Commit b7404c7e authored by Chris Wilson's avatar Chris Wilson

drm/i915: Bump ready tasks ahead of busywaits

Consider two tasks that are running in parallel on a pair of engines
(vcs0, vcs1), but then must complete on a shared engine (rcs0). To
maximise throughput, we want to run the first ready task on rcs0 (i.e.
the first task that completes on either of vcs0 or vcs1). When using
semaphores, however, we will instead queue onto rcs in submission order.

To resolve this incorrect ordering, we want to re-evaluate the priority
queue when each of the request is ready. Normally this happens because
we only insert into the priority queue requests that are ready, but with
semaphores we are inserting ahead of their readiness and to compensate
we penalize those tasks with reduced priority (so that tasks that do not
need to busywait should naturally be run first). However, given a series
of tasks that each use semaphores, the queue degrades into submission
fifo rather than readiness fifo, and so to counter this we give a small
boost to semaphore users as their dependent tasks are completed (and so
we no longer require any busywait prior to running the user task as they
are then ready themselves).

v2: Fixup irqsave for schedule_lock (Tvrtko)

Testcase: igt/gem_exec_schedule/semaphore-codependency
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190409152922.23894-1-chris@chris-wilson.co.uk
parent 9726920b
...@@ -552,6 +552,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) ...@@ -552,6 +552,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE; return NOTIFY_DONE;
} }
static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_request *request =
container_of(fence, typeof(*request), semaphore);
switch (state) {
case FENCE_COMPLETE:
/*
* We only check a small portion of our dependencies
* and so cannot guarantee that there remains no
* semaphore chain across all. Instead of opting
* for the full NOSEMAPHORE boost, we go for the
* smaller (but still preempting) boost of
* NEWCLIENT. This will be enough to boost over
* a busywaiting request (as that cannot be
* NEWCLIENT) without accidentally boosting
* a busywait over real work elsewhere.
*/
i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
break;
case FENCE_FREE:
i915_request_put(request);
break;
}
return NOTIFY_DONE;
}
static void ring_retire_requests(struct intel_ring *ring) static void ring_retire_requests(struct intel_ring *ring)
{ {
struct i915_request *rq, *rn; struct i915_request *rq, *rn;
...@@ -702,6 +732,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) ...@@ -702,6 +732,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
/* We bump the ref for the fence chain */ /* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
i915_sched_node_init(&rq->sched); i915_sched_node_init(&rq->sched);
...@@ -784,6 +815,12 @@ emit_semaphore_wait(struct i915_request *to, ...@@ -784,6 +815,12 @@ emit_semaphore_wait(struct i915_request *to,
&from->fence, 0, &from->fence, 0,
I915_FENCE_GFP); I915_FENCE_GFP);
err = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0,
I915_FENCE_GFP);
if (err < 0)
return err;
/* We need to pin the signaler's HWSP until we are finished reading. */ /* We need to pin the signaler's HWSP until we are finished reading. */
err = i915_timeline_read_hwsp(from, to, &hwsp_offset); err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
if (err) if (err)
...@@ -1114,6 +1151,7 @@ void i915_request_add(struct i915_request *request) ...@@ -1114,6 +1151,7 @@ void i915_request_add(struct i915_request *request)
* run at the earliest possible convenience. * run at the earliest possible convenience.
*/ */
local_bh_disable(); local_bh_disable();
i915_sw_fence_commit(&request->semaphore);
rcu_read_lock(); /* RCU serialisation for set-wedged protection */ rcu_read_lock(); /* RCU serialisation for set-wedged protection */
if (engine->schedule) { if (engine->schedule) {
struct i915_sched_attr attr = request->gem_context->sched; struct i915_sched_attr attr = request->gem_context->sched;
...@@ -1320,7 +1358,9 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1320,7 +1358,9 @@ long i915_request_wait(struct i915_request *rq,
if (flags & I915_WAIT_PRIORITY) { if (flags & I915_WAIT_PRIORITY) {
if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq); gen6_rps_boost(rq);
local_bh_disable(); /* suspend tasklets for reprioritisation */
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
local_bh_enable(); /* kick tasklets en masse */
} }
wait.tsk = current; wait.tsk = current;
......
...@@ -143,6 +143,7 @@ struct i915_request { ...@@ -143,6 +143,7 @@ struct i915_request {
struct i915_sw_dma_fence_cb dmaq; struct i915_sw_dma_fence_cb dmaq;
}; };
struct list_head execute_cb; struct list_head execute_cb;
struct i915_sw_fence semaphore;
/* /*
* A list of everyone we wait upon, and everyone who waits upon us. * A list of everyone we wait upon, and everyone who waits upon us.
......
...@@ -64,7 +64,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, ...@@ -64,7 +64,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
{ {
bool ret = false; bool ret = false;
spin_lock(&schedule_lock); spin_lock_irq(&schedule_lock);
if (!node_signaled(signal)) { if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link); INIT_LIST_HEAD(&dep->dfs_link);
...@@ -81,7 +81,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, ...@@ -81,7 +81,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
ret = true; ret = true;
} }
spin_unlock(&schedule_lock); spin_unlock_irq(&schedule_lock);
return ret; return ret;
} }
...@@ -108,7 +108,7 @@ void i915_sched_node_fini(struct i915_sched_node *node) ...@@ -108,7 +108,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
GEM_BUG_ON(!list_empty(&node->link)); GEM_BUG_ON(!list_empty(&node->link));
spin_lock(&schedule_lock); spin_lock_irq(&schedule_lock);
/* /*
* Everyone we depended upon (the fences we wait to be signaled) * Everyone we depended upon (the fences we wait to be signaled)
...@@ -135,7 +135,7 @@ void i915_sched_node_fini(struct i915_sched_node *node) ...@@ -135,7 +135,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
i915_dependency_free(dep); i915_dependency_free(dep);
} }
spin_unlock(&schedule_lock); spin_unlock_irq(&schedule_lock);
} }
static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline struct i915_priolist *to_priolist(struct rb_node *rb)
...@@ -356,7 +356,7 @@ static void __i915_schedule(struct i915_request *rq, ...@@ -356,7 +356,7 @@ static void __i915_schedule(struct i915_request *rq,
memset(&cache, 0, sizeof(cache)); memset(&cache, 0, sizeof(cache));
engine = rq->engine; engine = rq->engine;
spin_lock_irq(&engine->timeline.lock); spin_lock(&engine->timeline.lock);
/* Fifo and depth-first replacement ensure our deps execute before us */ /* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
...@@ -407,32 +407,33 @@ static void __i915_schedule(struct i915_request *rq, ...@@ -407,32 +407,33 @@ static void __i915_schedule(struct i915_request *rq,
tasklet_hi_schedule(&engine->execlists.tasklet); tasklet_hi_schedule(&engine->execlists.tasklet);
} }
spin_unlock_irq(&engine->timeline.lock); spin_unlock(&engine->timeline.lock);
} }
void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
{ {
spin_lock(&schedule_lock); spin_lock_irq(&schedule_lock);
__i915_schedule(rq, attr); __i915_schedule(rq, attr);
spin_unlock(&schedule_lock); spin_unlock_irq(&schedule_lock);
} }
void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
{ {
struct i915_sched_attr attr; struct i915_sched_attr attr;
unsigned long flags;
GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
return; return;
spin_lock_bh(&schedule_lock); spin_lock_irqsave(&schedule_lock, flags);
attr = rq->sched.attr; attr = rq->sched.attr;
attr.priority |= bump; attr.priority |= bump;
__i915_schedule(rq, &attr); __i915_schedule(rq, &attr);
spin_unlock_bh(&schedule_lock); spin_unlock_irqrestore(&schedule_lock, flags);
} }
void __i915_priolist_free(struct i915_priolist *p) void __i915_priolist_free(struct i915_priolist *p)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment