drm/i915: Push the wakeref->count deferral to the backend

If the backend wishes to defer the wakeref parking, make it responsible for unlocking the wakeref (i.e. bumping the counter). This allows it to time the unlock much more carefully in case it happens to needs the wakeref to be active during its deferral. For instance, during engine parking we may choose to emit an idle barrier (a request). To do so, we borrow the engine->kernel_context timeline and to ensure exclusive access we keep the engine->wakeref.count as 0. However, to submit that request to HW may require a intel_engine_pm_get() (e.g. to keep the submission tasklet alive) and before we allow that we have to rewake our wakeref to avoid a recursive deadlock. <4> [257.742916] IRQs not enabled as expected <4> [257.742930] WARNING: CPU: 0 PID: 0 at kernel/softirq.c:169 __local_bh_enable_ip+0xa9/0x100 <4> [257.742936] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt bluetooth snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ecdh_generic ecc ghash_clmulni_intel snd_pcm r8169 realtek lpc_ich prime_numbers i2c_hid <4> [257.742991] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U W 5.3.0-rc3-g5d0a06cd532c-drmtip_340+ #1 <4> [257.742998] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015 <4> [257.743008] RIP: 0010:__local_bh_enable_ip+0xa9/0x100 <4> [257.743017] Code: 37 5b 5d c3 8b 80 50 08 00 00 85 c0 75 a9 80 3d 0b be 25 01 00 75 a0 48 c7 c7 f3 0c 06 ac c6 05 fb bd 25 01 01 e8 77 84 ff ff <0f> 0b eb 89 48 89 ef e8 3b 41 06 00 eb 98 e8 e4 5c f4 ff 5b 5d c3 <4> [257.743025] RSP: 0018:ffffa78600003cb8 EFLAGS: 00010086 <4> [257.743035] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000010302 <4> [257.743042] RDX: 0000000080010302 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [257.743050] RBP: ffffffffc0494bb3 R08: 0000000000000000 R09: 0000000000000001 <4> [257.743058] R10: 0000000014c8f0e9 R11: 00000000fee2ff8e R12: ffffa23ba8c38008 <4> [257.743065] R13: ffffa23bacc579c0 R14: ffffa23bb7db0f60 R15: ffffa23b9cc8c430 <4> [257.743074] FS: 0000000000000000(0000) GS:ffffa23bbba00000(0000) knlGS:0000000000000000 <4> [257.743082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [257.743089] CR2: 00007fe477b20778 CR3: 000000011f72a000 CR4: 00000000001006f0 <4> [257.743096] Call Trace: <4> [257.743104] <IRQ> <4> [257.743265] __i915_request_commit+0x240/0x5d0 [i915] <4> [257.743427] ? __i915_request_create+0x228/0x4c0 [i915] <4> [257.743584] __engine_park+0x64/0x250 [i915] <4> [257.743730] ____intel_wakeref_put_last+0x1c/0x70 [i915] <4> [257.743878] i915_sample+0x2ee/0x310 [i915] <4> [257.744030] ? i915_pmu_cpu_offline+0xb0/0xb0 [i915] <4> [257.744040] __hrtimer_run_queues+0x11e/0x4b0 <4> [257.744068] hrtimer_interrupt+0xea/0x250 <4> [257.744079] ? lockdep_hardirqs_off+0x79/0xd0 <4> [257.744101] smp_apic_timer_interrupt+0x96/0x280 <4> [257.744114] apic_timer_interrupt+0xf/0x20 <4> [257.744125] RIP: 0010:__do_softirq+0xb3/0x4ae v2: Keep the priority_hint assert v3: That assert was desperately trying to point out my bug. Sorry, little assert. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111378Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190813190705.23869-1-chris@chris-wilson.co.uk

drm/i915: Push the wakeref->count deferral to the backend
If the backend wishes to defer the wakeref parking, make it responsible for unlocking the wakeref (i.e. bumping the counter). This allows it to time the unlock much more carefully in case it happens to needs the wakeref to be active during its deferral. For instance, during engine parking we may choose to emit an idle barrier (a request). To do so, we borrow the engine->kernel_context timeline and to ensure exclusive access we keep the engine->wakeref.count as 0. However, to submit that request to HW may require a intel_engine_pm_get() (e.g. to keep the submission tasklet alive) and before we allow that we have to rewake our wakeref to avoid a recursive deadlock. <4> [257.742916] IRQs not enabled as expected <4> [257.742930] WARNING: CPU: 0 PID: 0 at kernel/softirq.c:169 __local_bh_enable_ip+0xa9/0x100 <4> [257.742936] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt bluetooth snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ecdh_generic ecc ghash_clmulni_intel snd_pcm r8169 realtek lpc_ich prime_numbers i2c_hid <4> [257.742991] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U W 5.3.0-rc3-g5d0a06cd532c-drmtip_340+ #1 <4> [257.742998] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015 <4> [257.743008] RIP: 0010:__local_bh_enable_ip+0xa9/0x100 <4> [257.743017] Code: 37 5b 5d c3 8b 80 50 08 00 00 85 c0 75 a9 80 3d 0b be 25 01 00 75 a0 48 c7 c7 f3 0c 06 ac c6 05 fb bd 25 01 01 e8 77 84 ff ff <0f> 0b eb 89 48 89 ef e8 3b 41 06 00 eb 98 e8 e4 5c f4 ff 5b 5d c3 <4> [257.743025] RSP: 0018:ffffa78600003cb8 EFLAGS: 00010086 <4> [257.743035] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000010302 <4> [257.743042] RDX: 0000000080010302 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [257.743050] RBP: ffffffffc0494bb3 R08: 0000000000000000 R09: 0000000000000001 <4> [257.743058] R10: 0000000014c8f0e9 R11: 00000000fee2ff8e R12: ffffa23ba8c38008 <4> [257.743065] R13: ffffa23bacc579c0 R14: ffffa23bb7db0f60 R15: ffffa23b9cc8c430 <4> [257.743074] FS: 0000000000000000(0000) GS:ffffa23bbba00000(0000) knlGS:0000000000000000 <4> [257.743082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [257.743089] CR2: 00007fe477b20778 CR3: 000000011f72a000 CR4: 00000000001006f0 <4> [257.743096] Call Trace: <4> [257.743104] <IRQ> <4> [257.743265] __i915_request_commit+0x240/0x5d0 [i915] <4> [257.743427] ? __i915_request_create+0x228/0x4c0 [i915] <4> [257.743584] __engine_park+0x64/0x250 [i915] <4> [257.743730] ____intel_wakeref_put_last+0x1c/0x70 [i915] <4> [257.743878] i915_sample+0x2ee/0x310 [i915] <4> [257.744030] ? i915_pmu_cpu_offline+0xb0/0xb0 [i915] <4> [257.744040] __hrtimer_run_queues+0x11e/0x4b0 <4> [257.744068] hrtimer_interrupt+0xea/0x250 <4> [257.744079] ? lockdep_hardirqs_off+0x79/0xd0 <4> [257.744101] smp_apic_timer_interrupt+0x96/0x280 <4> [257.744114] apic_timer_interrupt+0xf/0x20 <4> [257.744125] RIP: 0010:__do_softirq+0xb3/0x4ae v2: Keep the priority_hint assert v3: That assert was desperately trying to point out my bug. Sorry, little assert. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111378Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190813190705.23869-1-chris@chris-wilson.co.uk
a79ca656 · Chris Wilson · 1cdd8705 · a79ca656 · a79ca656 · a79ca656
Commit a79ca656 authored Aug 13, 2019 by Chris Wilson
7 changed files
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -68,10 +68,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)

 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
-
 	i915_request_add_active_barriers(rq);
+
+	/* Install ourselves as a preemption barrier */
+	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
 	__i915_request_commit(rq);

+	/* Release our exclusive hold on the engine */
+	__intel_wakeref_defer_park(&engine->wakeref);
+	__i915_request_queue(rq, NULL);
+
 	return false;
 }


--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -16,18 +16,6 @@ enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
 	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
-
-	/*
-	 * Requests containing performance queries must not be preempted by
-	 * another context. They get scheduled with their default priority and
-	 * once they reach the execlist ports we ensure that they stick on the
-	 * HW until finished by pretending that they have maximum priority,
-	 * i.e. nothing can have higher priority and force us to usurp the
-	 * active request.
-	 */
-	I915_PRIORITY_UNPREEMPTABLE = INT_MAX,
-
-	I915_PRIORITY_INVALID = INT_MIN
 };

 #define I915_USER_PRIORITY_SHIFT 2
@@ -39,6 +27,19 @@ enum {
 #define I915_PRIORITY_WAIT		((u8)BIT(0))
 #define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(1))

+/* Smallest priority value that cannot be bumped. */
+#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
+
+/*
+ * Requests containing performance queries must not be preempted by
+ * another context. They get scheduled with their default priority and
+ * once they reach the execlist ports we ensure that they stick on the
+ * HW until finished by pretending that they have maximum priority,
+ * i.e. nothing can have higher priority and force us to usurp the
+ * active request.
+ */
+#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)

 struct i915_priolist {

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 		list_add(&ring->active_link, &rq->i915->gt.active_rings);
 	rq->emitted_jiffies = jiffies;

+	return prev;
+}
+
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr)
+{
 	/*
 	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
@@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	local_bh_disable();
 	i915_sw_fence_commit(&rq->semaphore);
-	if (engine->schedule) {
-		struct i915_sched_attr attr = rq->gem_context->sched;
-
-		/*
-		 * Boost actual workloads past semaphores!
-		 *
-		 * With semaphores we spin on one engine waiting for another,
-		 * simply to reduce the latency of starting our work when
-		 * the signaler completes. However, if there is any other
-		 * work that we could be doing on this engine instead, that
-		 * is better utilisation and will reduce the overall duration
-		 * of the current work. To avoid PI boosting a semaphore
-		 * far in the distance past over useful work, we keep a history
-		 * of any semaphore use along our dependency chain.
-		 */
-		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
-		/*
-		 * Boost priorities to new clients (new request flows).
-		 *
-		 * Allow interactive/synchronous clients to jump ahead of
-		 * the bulk clients. (FQ_CODEL)
-		 */
-		if (list_empty(&rq->sched.signalers_list))
-			attr.priority |= I915_PRIORITY_WAIT;
-
-		engine->schedule(rq, &attr);
-	}
+	if (attr && rq->engine->schedule)
+		rq->engine->schedule(rq, attr);
 	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
-
-	return prev;
 }

 void i915_request_add(struct i915_request *rq)
 {
+	struct i915_sched_attr attr = rq->gem_context->sched;
 	struct i915_request *prev;

 	lockdep_assert_held(&rq->timeline->mutex);
@@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq)

 	prev = __i915_request_commit(rq);

+	/*
+	 * Boost actual workloads past semaphores!
+	 *
+	 * With semaphores we spin on one engine waiting for another,
+	 * simply to reduce the latency of starting our work when
+	 * the signaler completes. However, if there is any other
+	 * work that we could be doing on this engine instead, that
+	 * is better utilisation and will reduce the overall duration
+	 * of the current work. To avoid PI boosting a semaphore
+	 * far in the distance past over useful work, we keep a history
+	 * of any semaphore use along our dependency chain.
+	 */
+	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
+	/*
+	 * Boost priorities to new clients (new request flows).
+	 *
+	 * Allow interactive/synchronous clients to jump ahead of
+	 * the bulk clients. (FQ_CODEL)
+	 */
+	if (list_empty(&rq->sched.signalers_list))
+		attr.priority |= I915_PRIORITY_WAIT;
+
+	__i915_request_queue(rq, &attr);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it

--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -251,6 +251,8 @@ struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);

 struct i915_request *__i915_request_commit(struct i915_request *request);
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr);

 void i915_request_retire_upto(struct i915_request *rq);


--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 	unsigned long flags;

 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
-
-	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+	if (READ_ONCE(rq->sched.attr.priority) & bump)
 		return;

 	spin_lock_irqsave(&schedule_lock, flags);

--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 	if (!atomic_dec_and_test(&wf->count))
 		goto unlock;

+	/* ops->put() must reschedule its own release on error/deferral */
 	if (likely(!wf->ops->put(wf))) {
 		rpm_put(wf);
 		wake_up_var(&wf->wakeref);
-	} else {
-		/* ops->put() must schedule its own release on deferral */
-		atomic_set_release(&wf->count, 1);
 	}

 unlock:

--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
 	return READ_ONCE(wf->wakeref);
 }

+/**
+ * __intel_wakeref_defer_park: Defer the current park callback
+ * @wf: the wakeref
+ */
+static inline void
+__intel_wakeref_defer_park(struct intel_wakeref *wf)
+{
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
+	atomic_set_release(&wf->count, 1);
+}
+
 /**
 * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
 * @wf: the wakeref