Commit 0e97fbb0 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gem: Use a single chained reloc batches for a single execbuf

As we can now keep chaining together a relocation batch to process any
number of relocations, we can keep building that relocation batch for
all of the target vma. This avoiding emitting a new request into the
ring for each target, consuming precious ring space and a potential
stall.

v2: Propagate the failure from submitting the relocation batch.

Testcase: igt/gem_exec_reloc/basic-wide-active
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200501192945.22215-2-chris@chris-wilson.co.uk
parent 964a9b0f
...@@ -268,6 +268,7 @@ struct i915_execbuffer { ...@@ -268,6 +268,7 @@ struct i915_execbuffer {
bool has_fence : 1; bool has_fence : 1;
bool needs_unfenced : 1; bool needs_unfenced : 1;
struct i915_vma *target;
struct i915_request *rq; struct i915_request *rq;
struct i915_vma *rq_vma; struct i915_vma *rq_vma;
u32 *rq_cmd; u32 *rq_cmd;
...@@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache) ...@@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
} }
static void reloc_gpu_flush(struct reloc_cache *cache) static int reloc_gpu_flush(struct reloc_cache *cache)
{ {
struct i915_request *rq; struct i915_request *rq;
int err; int err;
rq = fetch_and_zero(&cache->rq); rq = fetch_and_zero(&cache->rq);
if (!rq) if (!rq)
return; return 0;
if (cache->rq_vma) { if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj; struct drm_i915_gem_object *obj = cache->rq_vma->obj;
...@@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache) ...@@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
intel_gt_chipset_flush(rq->engine->gt); intel_gt_chipset_flush(rq->engine->gt);
i915_request_add(rq); i915_request_add(rq);
return err;
} }
static void reloc_cache_reset(struct reloc_cache *cache) static void reloc_cache_reset(struct reloc_cache *cache)
{ {
void *vaddr; void *vaddr;
if (cache->rq)
reloc_gpu_flush(cache);
if (!cache->vaddr) if (!cache->vaddr)
return; return;
...@@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) ...@@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
} }
static int __reloc_gpu_alloc(struct i915_execbuffer *eb, static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len) unsigned int len)
{ {
struct reloc_cache *cache = &eb->reloc_cache; struct reloc_cache *cache = &eb->reloc_cache;
...@@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto out_pool; goto out_pool;
} }
batch = i915_vma_instance(pool->obj, vma->vm, NULL); batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
if (IS_ERR(batch)) { if (IS_ERR(batch)) {
err = PTR_ERR(batch); err = PTR_ERR(batch);
goto err_unmap; goto err_unmap;
...@@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ...@@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err) if (err)
goto err_request; goto err_request;
err = reloc_move_to_gpu(rq, vma);
if (err)
goto err_request;
i915_vma_lock(batch); i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false); err = i915_request_await_object(rq, batch->obj, false);
if (err == 0) if (err == 0)
...@@ -1376,11 +1371,21 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, ...@@ -1376,11 +1371,21 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (!intel_engine_can_store_dword(eb->engine)) if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
err = __reloc_gpu_alloc(eb, vma, len); err = __reloc_gpu_alloc(eb, len);
if (unlikely(err)) if (unlikely(err))
return ERR_PTR(err); return ERR_PTR(err);
} }
if (vma != cache->target) {
err = reloc_move_to_gpu(cache->rq, vma);
if (unlikely(err)) {
i915_request_set_error_once(cache->rq, err);
return ERR_PTR(err);
}
cache->target = vma;
}
if (unlikely(cache->rq_size + len > if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache); err = reloc_gpu_chain(cache);
...@@ -1692,15 +1697,20 @@ static int eb_relocate(struct i915_execbuffer *eb) ...@@ -1692,15 +1697,20 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */ /* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) { if (eb->args->flags & __EXEC_HAS_RELOC) {
struct eb_vma *ev; struct eb_vma *ev;
int flush;
list_for_each_entry(ev, &eb->relocs, reloc_link) { list_for_each_entry(ev, &eb->relocs, reloc_link) {
err = eb_relocate_vma(eb, ev); err = eb_relocate_vma(eb, ev);
if (err) if (err)
return err; break;
} }
flush = reloc_gpu_flush(&eb->reloc_cache);
if (!err)
err = flush;
} }
return 0; return err;
} }
static int eb_move_to_gpu(struct i915_execbuffer *eb) static int eb_move_to_gpu(struct i915_execbuffer *eb)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment