Commit ffb0c600 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Reorder await_execution before await_request

Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk
parent 22da5d84
...@@ -1058,37 +1058,91 @@ emit_semaphore_wait(struct i915_request *to, ...@@ -1058,37 +1058,91 @@ emit_semaphore_wait(struct i915_request *to,
I915_FENCE_GFP); I915_FENCE_GFP);
} }
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
struct dma_fence *fence)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
}
static int static int
i915_request_await_request(struct i915_request *to, struct i915_request *from) __i915_request_await_execution(struct i915_request *to,
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
int ret; int err;
GEM_BUG_ON(to == from); GEM_BUG_ON(intel_context_is_barrier(from->context));
GEM_BUG_ON(to->timeline == from->timeline);
if (i915_request_completed(from)) { /* Submit both requests at the same time */
i915_sw_fence_set_error_once(&to->submit, from->fence.error); err = __await_execution(to, from, hook, I915_FENCE_GFP);
if (err)
return err;
/* Squash repeated depenendices to the same timelines */
if (intel_timeline_sync_has_start(i915_request_timeline(to),
&from->fence))
return 0; return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
} }
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) { if (to->engine->schedule) {
ret = i915_sched_node_add_dependency(&to->sched, err = i915_sched_node_add_dependency(&to->sched,
&from->sched, &from->sched,
I915_DEPENDENCY_EXTERNAL); I915_DEPENDENCY_WEAK);
if (ret < 0) if (err < 0)
return ret; return err;
} }
if (to->engine == from->engine) return intel_timeline_sync_set_start(i915_request_timeline(to),
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->fence);
&from->submit,
I915_FENCE_GFP);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
} }
static void mark_external(struct i915_request *rq) static void mark_external(struct i915_request *rq)
...@@ -1141,23 +1195,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) ...@@ -1141,23 +1195,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
} }
int int
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
struct dma_fence **child = &fence; struct dma_fence **child = &fence;
unsigned int nchild = 1; unsigned int nchild = 1;
int ret; int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) { if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence); struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences; child = array->fences;
nchild = array->num_fences; nchild = array->num_fences;
GEM_BUG_ON(!nchild); GEM_BUG_ON(!nchild);
...@@ -1170,138 +1221,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) ...@@ -1170,138 +1221,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
continue; continue;
} }
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context) if (fence->context == rq->fence.context)
continue; continue;
/* Squash repeated waits to the same timelines */ /*
if (fence->context && * We don't squash repeated fence dependencies here as we
intel_timeline_sync_is_later(i915_request_timeline(rq), * want to run our callback in all cases.
fence)) */
continue;
if (dma_fence_is_i915(fence)) if (dma_fence_is_i915(fence))
ret = i915_request_await_request(rq, to_request(fence)); ret = __i915_request_await_execution(rq,
to_request(fence),
hook);
else else
ret = i915_request_await_external(rq, fence); ret = i915_request_await_external(rq, fence);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild); } while (--nchild);
return 0; return 0;
} }
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
struct dma_fence *fence)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
}
static int static int
__i915_request_await_execution(struct i915_request *to, i915_request_await_request(struct i915_request *to, struct i915_request *from)
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
int err; int ret;
GEM_BUG_ON(intel_context_is_barrier(from->context));
/* Submit both requests at the same time */ GEM_BUG_ON(to == from);
err = __await_execution(to, from, hook, I915_FENCE_GFP); GEM_BUG_ON(to->timeline == from->timeline);
if (err)
return err;
/* Squash repeated depenendices to the same timelines */ if (i915_request_completed(from)) {
if (intel_timeline_sync_has_start(i915_request_timeline(to), i915_sw_fence_set_error_once(&to->submit, from->fence.error);
&from->fence))
return 0; return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
} }
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) { if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched, ret = i915_sched_node_add_dependency(&to->sched,
&from->sched, &from->sched,
I915_DEPENDENCY_WEAK); I915_DEPENDENCY_EXTERNAL);
if (err < 0) if (ret < 0)
return err; return ret;
} }
return intel_timeline_sync_set_start(i915_request_timeline(to), if (to->engine == READ_ONCE(from->engine))
&from->fence); ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit,
I915_FENCE_GFP);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
} }
int int
i915_request_await_execution(struct i915_request *rq, i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{ {
struct dma_fence **child = &fence; struct dma_fence **child = &fence;
unsigned int nchild = 1; unsigned int nchild = 1;
int ret; int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) { if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence); struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences; child = array->fences;
nchild = array->num_fences; nchild = array->num_fences;
GEM_BUG_ON(!nchild); GEM_BUG_ON(!nchild);
...@@ -1314,22 +1305,31 @@ i915_request_await_execution(struct i915_request *rq, ...@@ -1314,22 +1305,31 @@ i915_request_await_execution(struct i915_request *rq,
continue; continue;
} }
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context) if (fence->context == rq->fence.context)
continue; continue;
/* /* Squash repeated waits to the same timelines */
* We don't squash repeated fence dependencies here as we if (fence->context &&
* want to run our callback in all cases. intel_timeline_sync_is_later(i915_request_timeline(rq),
*/ fence))
continue;
if (dma_fence_is_i915(fence)) if (dma_fence_is_i915(fence))
ret = __i915_request_await_execution(rq, ret = i915_request_await_request(rq, to_request(fence));
to_request(fence),
hook);
else else
ret = i915_request_await_external(rq, fence); ret = i915_request_await_external(rq, fence);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild); } while (--nchild);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment