Commit ffb0c600 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Reorder await_execution before await_request

Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk
parent 22da5d84
......@@ -1058,37 +1058,91 @@ emit_semaphore_wait(struct i915_request *to,
I915_FENCE_GFP);
}
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
struct dma_fence *fence)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
}
static int
i915_request_await_request(struct i915_request *to, struct i915_request *from)
__i915_request_await_execution(struct i915_request *to,
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{
int ret;
int err;
GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline);
GEM_BUG_ON(intel_context_is_barrier(from->context));
if (i915_request_completed(from)) {
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
/* Submit both requests at the same time */
err = __await_execution(to, from, hook, I915_FENCE_GFP);
if (err)
return err;
/* Squash repeated depenendices to the same timelines */
if (intel_timeline_sync_has_start(i915_request_timeline(to),
&from->fence))
return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
}
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
ret = i915_sched_node_add_dependency(&to->sched,
err = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_EXTERNAL);
if (ret < 0)
return ret;
I915_DEPENDENCY_WEAK);
if (err < 0)
return err;
}
if (to->engine == from->engine)
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit,
I915_FENCE_GFP);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
return intel_timeline_sync_set_start(i915_request_timeline(to),
&from->fence);
}
static void mark_external(struct i915_request *rq)
......@@ -1141,23 +1195,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
}
int
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
{
struct dma_fence **child = &fence;
unsigned int nchild = 1;
int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences;
nchild = array->num_fences;
GEM_BUG_ON(!nchild);
......@@ -1170,138 +1221,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
continue;
}
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context)
continue;
/* Squash repeated waits to the same timelines */
if (fence->context &&
intel_timeline_sync_is_later(i915_request_timeline(rq),
fence))
continue;
/*
* We don't squash repeated fence dependencies here as we
* want to run our callback in all cases.
*/
if (dma_fence_is_i915(fence))
ret = i915_request_await_request(rq, to_request(fence));
ret = __i915_request_await_execution(rq,
to_request(fence),
hook);
else
ret = i915_request_await_external(rq, fence);
if (ret < 0)
return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild);
return 0;
}
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
struct dma_fence *fence)
{
return __intel_timeline_sync_is_later(tl,
fence->context,
fence->seqno - 1);
}
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
const struct dma_fence *fence)
{
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
}
static int
__i915_request_await_execution(struct i915_request *to,
struct i915_request *from,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
i915_request_await_request(struct i915_request *to, struct i915_request *from)
{
int err;
GEM_BUG_ON(intel_context_is_barrier(from->context));
int ret;
/* Submit both requests at the same time */
err = __await_execution(to, from, hook, I915_FENCE_GFP);
if (err)
return err;
GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline);
/* Squash repeated depenendices to the same timelines */
if (intel_timeline_sync_has_start(i915_request_timeline(to),
&from->fence))
if (i915_request_completed(from)) {
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
return 0;
/*
* Wait until the start of this request.
*
* The execution cb fires when we submit the request to HW. But in
* many cases this may be long before the request itself is ready to
* run (consider that we submit 2 requests for the same context, where
* the request of interest is behind an indefinite spinner). So we hook
* up to both to reduce our queues and keep the execution lag minimised
* in the worst case, though we hope that the await_start is elided.
*/
err = i915_request_await_start(to, from);
if (err < 0)
return err;
/*
* Ensure both start together [after all semaphores in signal]
*
* Now that we are queued to the HW at roughly the same time (thanks
* to the execute cb) and are ready to run at roughly the same time
* (thanks to the await start), our signaler may still be indefinitely
* delayed by waiting on a semaphore from a remote engine. If our
* signaler depends on a semaphore, so indirectly do we, and we do not
* want to start our payload until our signaler also starts theirs.
* So we wait.
*
* However, there is also a second condition for which we need to wait
* for the precise start of the signaler. Consider that the signaler
* was submitted in a chain of requests following another context
* (with just an ordinary intra-engine fence dependency between the
* two). In this case the signaler is queued to HW, but not for
* immediate execution, and so we must wait until it reaches the
* active slot.
*/
if (intel_engine_has_semaphores(to->engine) &&
!i915_request_has_initial_breadcrumb(to)) {
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
if (err < 0)
return err;
}
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched,
ret = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_WEAK);
if (err < 0)
return err;
I915_DEPENDENCY_EXTERNAL);
if (ret < 0)
return ret;
}
return intel_timeline_sync_set_start(i915_request_timeline(to),
&from->fence);
if (to->engine == READ_ONCE(from->engine))
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit,
I915_FENCE_GFP);
else
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
if (ret < 0)
return ret;
return 0;
}
int
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
void (*hook)(struct i915_request *rq,
struct dma_fence *signal))
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
{
struct dma_fence **child = &fence;
unsigned int nchild = 1;
int ret;
/*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to
* amdgpu and we should not see any incoming fence-array from
* sync-file being in signal-on-any mode.
*/
if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence);
/* XXX Error for signal-on-any fence arrays */
child = array->fences;
nchild = array->num_fences;
GEM_BUG_ON(!nchild);
......@@ -1314,22 +1305,31 @@ i915_request_await_execution(struct i915_request *rq,
continue;
}
/*
* Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring.
*/
if (fence->context == rq->fence.context)
continue;
/*
* We don't squash repeated fence dependencies here as we
* want to run our callback in all cases.
*/
/* Squash repeated waits to the same timelines */
if (fence->context &&
intel_timeline_sync_is_later(i915_request_timeline(rq),
fence))
continue;
if (dma_fence_is_i915(fence))
ret = __i915_request_await_execution(rq,
to_request(fence),
hook);
ret = i915_request_await_request(rq, to_request(fence));
else
ret = i915_request_await_external(rq, fence);
if (ret < 0)
return ret;
/* Record the latest fence used against each timeline */
if (fence->context)
intel_timeline_sync_set(i915_request_timeline(rq),
fence);
} while (--nchild);
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment