Commit 2bf541ff authored by Maarten Lankhorst's avatar Maarten Lankhorst Committed by Joonas Lahtinen

drm/i915: Pin engine before pinning all objects, v5.

We want to lock all gem objects, including the engine context objects,
rework the throttling to ensure that we can do this. Now we only throttle
once, but can take eb_pin_engine while acquiring objects. This means we
will have to drop the lock to wait. If we don't have to throttle we can
still take the fastpath, if not we will take the slowpath and wait for
the throttle request while unlocked.

The engine has to be pinned as first step, otherwise gpu relocations
won't work.

Changes since v1:
- Only need to get a throttled request in the fastpath, no need for
  a global flag any more.
- Always free the waited request correctly.
Changes since v2:
- Use intel_engine_pm_get()/put() to keeep engine pool alive during
  EDEADLK handling.
Changes since v3:
- Fix small rq leak.
Changes since v4:
- Use a single reloc_context, for intel_context_pin_ww().
Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200819140904.1708856-13-maarten.lankhorst@linux.intel.comSigned-off-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent b49a7d51
...@@ -56,7 +56,8 @@ enum { ...@@ -56,7 +56,8 @@ enum {
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31) #define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_INTERNAL_FLAGS (~0u << 31) #define __EXEC_ENGINE_PINNED BIT(30)
#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define UPDATE PIN_OFFSET_FIXED #define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024) #define BATCH_OFFSET_BIAS (256*1024)
...@@ -281,6 +282,7 @@ struct i915_execbuffer { ...@@ -281,6 +282,7 @@ struct i915_execbuffer {
} reloc_cache; } reloc_cache;
struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
struct intel_context *reloc_context;
u64 invalid_flags; /** Set of execobj.flags that are invalid */ u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */
...@@ -303,6 +305,9 @@ struct i915_execbuffer { ...@@ -303,6 +305,9 @@ struct i915_execbuffer {
}; };
static int eb_parse(struct i915_execbuffer *eb); static int eb_parse(struct i915_execbuffer *eb);
static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{ {
...@@ -935,7 +940,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) ...@@ -935,7 +940,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
} }
} }
static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
{ {
const unsigned int count = eb->buffer_count; const unsigned int count = eb->buffer_count;
unsigned int i; unsigned int i;
...@@ -952,6 +957,8 @@ static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) ...@@ -952,6 +957,8 @@ static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
if (final) if (final)
i915_vma_put(vma); i915_vma_put(vma);
} }
eb_unpin_engine(eb);
} }
static void eb_destroy(const struct i915_execbuffer *eb) static void eb_destroy(const struct i915_execbuffer *eb)
...@@ -1292,19 +1299,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1292,19 +1299,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (engine == eb->context->engine) { if (engine == eb->context->engine) {
rq = i915_request_create(eb->context); rq = i915_request_create(eb->context);
} else { } else {
struct intel_context *ce; struct intel_context *ce = eb->reloc_context;
ce = intel_context_create(engine); if (!ce) {
if (IS_ERR(ce)) { ce = intel_context_create(engine);
err = PTR_ERR(ce); if (IS_ERR(ce)) {
goto err_unpin; err = PTR_ERR(ce);
goto err_unpin;
}
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(eb->context->vm);
eb->reloc_context = ce;
} }
i915_vm_put(ce->vm); err = intel_context_pin(ce);
ce->vm = i915_vm_get(eb->context->vm); if (err)
goto err_unpin;
rq = intel_context_create_request(ce); rq = i915_request_create(ce);
intel_context_put(ce); intel_context_unpin(ce);
} }
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
...@@ -1871,7 +1885,8 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb) ...@@ -1871,7 +1885,8 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
return 0; return 0;
} }
static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
struct i915_request *rq)
{ {
bool have_copy = false; bool have_copy = false;
struct eb_vma *ev; struct eb_vma *ev;
...@@ -1887,6 +1902,21 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) ...@@ -1887,6 +1902,21 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
eb_release_vmas(eb, false); eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww); i915_gem_ww_ctx_fini(&eb->ww);
if (rq) {
/* nonblocking is always false */
if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT) < 0) {
i915_request_put(rq);
rq = NULL;
err = -EINTR;
goto err_relock;
}
i915_request_put(rq);
rq = NULL;
}
/* /*
* We take 3 passes through the slowpatch. * We take 3 passes through the slowpatch.
* *
...@@ -1910,14 +1940,25 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) ...@@ -1910,14 +1940,25 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
err = 0; err = 0;
} }
flush_workqueue(eb->i915->mm.userptr_wq); if (!err)
flush_workqueue(eb->i915->mm.userptr_wq);
err_relock:
i915_gem_ww_ctx_init(&eb->ww, true); i915_gem_ww_ctx_init(&eb->ww, true);
if (err) if (err)
goto out; goto out;
/* reacquire the objects */ /* reacquire the objects */
repeat_validate: repeat_validate:
rq = eb_pin_engine(eb, false);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err;
}
/* We didn't throttle, should be NULL */
GEM_WARN_ON(rq);
err = eb_validate_vmas(eb); err = eb_validate_vmas(eb);
if (err) if (err)
goto err; goto err;
...@@ -1988,14 +2029,49 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) ...@@ -1988,14 +2029,49 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
} }
} }
if (rq)
i915_request_put(rq);
return err; return err;
} }
static int eb_relocate_parse(struct i915_execbuffer *eb) static int eb_relocate_parse(struct i915_execbuffer *eb)
{ {
int err; int err;
struct i915_request *rq = NULL;
bool throttle = true;
retry: retry:
rq = eb_pin_engine(eb, throttle);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
rq = NULL;
if (err != -EDEADLK)
return err;
goto err;
}
if (rq) {
bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
/* Need to drop all locks now for throttling, take slowpath */
err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
if (err == -ETIME) {
if (nonblock) {
err = -EWOULDBLOCK;
i915_request_put(rq);
goto err;
}
goto slow;
}
i915_request_put(rq);
rq = NULL;
}
/* only throttle once, even if we didn't need to throttle */
throttle = false;
err = eb_validate_vmas(eb); err = eb_validate_vmas(eb);
if (err == -EAGAIN) if (err == -EAGAIN)
goto slow; goto slow;
...@@ -2032,7 +2108,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb) ...@@ -2032,7 +2108,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
return err; return err;
slow: slow:
err = eb_relocate_parse_slow(eb); err = eb_relocate_parse_slow(eb, rq);
if (err) if (err)
/* /*
* If the user expects the execobject.offset and * If the user expects the execobject.offset and
...@@ -2486,7 +2562,7 @@ static const enum intel_engine_id user_ring_map[] = { ...@@ -2486,7 +2562,7 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0 [I915_EXEC_VEBOX] = VECS0
}; };
static struct i915_request *eb_throttle(struct intel_context *ce) static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
{ {
struct intel_ring *ring = ce->ring; struct intel_ring *ring = ce->ring;
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
...@@ -2520,22 +2596,17 @@ static struct i915_request *eb_throttle(struct intel_context *ce) ...@@ -2520,22 +2596,17 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
return i915_request_get(rq); return i915_request_get(rq);
} }
static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
{ {
struct intel_context *ce = eb->context;
struct intel_timeline *tl; struct intel_timeline *tl;
struct i915_request *rq; struct i915_request *rq = NULL;
int err; int err;
/* GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
err = intel_gt_terminally_wedged(ce->engine->gt);
if (err)
return err;
if (unlikely(intel_context_is_banned(ce))) if (unlikely(intel_context_is_banned(ce)))
return -EIO; return ERR_PTR(-EIO);
/* /*
* Pinning the contexts may generate requests in order to acquire * Pinning the contexts may generate requests in order to acquire
...@@ -2544,7 +2615,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) ...@@ -2544,7 +2615,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
*/ */
err = intel_context_pin(ce); err = intel_context_pin(ce);
if (err) if (err)
return err; return ERR_PTR(err);
/* /*
* Take a local wakeref for preparing to dispatch the execbuf as * Take a local wakeref for preparing to dispatch the execbuf as
...@@ -2556,45 +2627,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) ...@@ -2556,45 +2627,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
*/ */
tl = intel_context_timeline_lock(ce); tl = intel_context_timeline_lock(ce);
if (IS_ERR(tl)) { if (IS_ERR(tl)) {
err = PTR_ERR(tl); intel_context_unpin(ce);
goto err_unpin; return ERR_CAST(tl);
} }
intel_context_enter(ce); intel_context_enter(ce);
rq = eb_throttle(ce); if (throttle)
rq = eb_throttle(eb, ce);
intel_context_timeline_unlock(tl); intel_context_timeline_unlock(tl);
if (rq) { eb->args->flags |= __EXEC_ENGINE_PINNED;
bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; return rq;
long timeout;
timeout = MAX_SCHEDULE_TIMEOUT;
if (nonblock)
timeout = 0;
timeout = i915_request_wait(rq,
I915_WAIT_INTERRUPTIBLE,
timeout);
i915_request_put(rq);
if (timeout < 0) {
err = nonblock ? -EWOULDBLOCK : timeout;
goto err_exit;
}
}
eb->engine = ce->engine;
eb->context = ce;
return 0;
err_exit:
mutex_lock(&tl->mutex);
intel_context_exit(ce);
intel_context_timeline_unlock(tl);
err_unpin:
intel_context_unpin(ce);
return err;
} }
static void eb_unpin_engine(struct i915_execbuffer *eb) static void eb_unpin_engine(struct i915_execbuffer *eb)
...@@ -2602,6 +2645,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) ...@@ -2602,6 +2645,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
struct intel_context *ce = eb->context; struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
return;
eb->args->flags &= ~__EXEC_ENGINE_PINNED;
mutex_lock(&tl->mutex); mutex_lock(&tl->mutex);
intel_context_exit(ce); intel_context_exit(ce);
mutex_unlock(&tl->mutex); mutex_unlock(&tl->mutex);
...@@ -2653,7 +2701,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) ...@@ -2653,7 +2701,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
} }
static int static int
eb_pin_engine(struct i915_execbuffer *eb) eb_select_engine(struct i915_execbuffer *eb)
{ {
struct intel_context *ce; struct intel_context *ce;
unsigned int idx; unsigned int idx;
...@@ -2668,12 +2716,45 @@ eb_pin_engine(struct i915_execbuffer *eb) ...@@ -2668,12 +2716,45 @@ eb_pin_engine(struct i915_execbuffer *eb)
if (IS_ERR(ce)) if (IS_ERR(ce))
return PTR_ERR(ce); return PTR_ERR(ce);
err = __eb_pin_engine(eb, ce); intel_gt_pm_get(ce->engine->gt);
intel_context_put(ce);
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
if (err)
goto err;
}
/*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
err = intel_gt_terminally_wedged(ce->engine->gt);
if (err)
goto err;
eb->context = ce;
eb->engine = ce->engine;
/*
* Make sure engine pool stays alive even if we call intel_context_put
* during ww handling. The pool is destroyed when last pm reference
* is dropped, which breaks our -EDEADLK handling.
*/
return err;
err:
intel_gt_pm_put(ce->engine->gt);
intel_context_put(ce);
return err; return err;
} }
static void
eb_put_engine(struct i915_execbuffer *eb)
{
intel_gt_pm_put(eb->engine->gt);
intel_context_put(eb->context);
}
static void static void
__free_fence_array(struct eb_fence *fences, unsigned int n) __free_fence_array(struct eb_fence *fences, unsigned int n)
{ {
...@@ -3054,6 +3135,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -3054,6 +3135,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
eb.vma[0].vma = NULL; eb.vma[0].vma = NULL;
eb.reloc_pool = eb.batch_pool = NULL; eb.reloc_pool = eb.batch_pool = NULL;
eb.reloc_context = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915); reloc_cache_init(&eb.reloc_cache, eb.i915);
...@@ -3122,7 +3204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -3122,7 +3204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err)) if (unlikely(err))
goto err_destroy; goto err_destroy;
err = eb_pin_engine(&eb); err = eb_select_engine(&eb);
if (unlikely(err)) if (unlikely(err))
goto err_context; goto err_context;
...@@ -3259,8 +3341,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -3259,8 +3341,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
intel_gt_buffer_pool_put(eb.batch_pool); intel_gt_buffer_pool_put(eb.batch_pool);
if (eb.reloc_pool) if (eb.reloc_pool)
intel_gt_buffer_pool_put(eb.reloc_pool); intel_gt_buffer_pool_put(eb.reloc_pool);
if (eb.reloc_context)
intel_context_put(eb.reloc_context);
err_engine: err_engine:
eb_unpin_engine(&eb); eb_put_engine(&eb);
err_context: err_context:
i915_gem_context_put(eb.gem_context); i915_gem_context_put(eb.gem_context);
err_destroy: err_destroy:
......
...@@ -135,6 +135,7 @@ static int igt_gpu_reloc(void *arg) ...@@ -135,6 +135,7 @@ static int igt_gpu_reloc(void *arg)
goto err_pm; goto err_pm;
} }
eb.reloc_pool = NULL; eb.reloc_pool = NULL;
eb.reloc_context = NULL;
i915_gem_ww_ctx_init(&eb.ww, false); i915_gem_ww_ctx_init(&eb.ww, false);
retry: retry:
...@@ -153,6 +154,8 @@ static int igt_gpu_reloc(void *arg) ...@@ -153,6 +154,8 @@ static int igt_gpu_reloc(void *arg)
if (eb.reloc_pool) if (eb.reloc_pool)
intel_gt_buffer_pool_put(eb.reloc_pool); intel_gt_buffer_pool_put(eb.reloc_pool);
if (eb.reloc_context)
intel_context_put(eb.reloc_context);
intel_context_put(eb.context); intel_context_put(eb.context);
err_pm: err_pm:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment