Commit 4adbcff2 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-fixes-2019-10-10' of...

Merge tag 'drm-intel-fixes-2019-10-10' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

- Fix CML display by adding a missing ID.
- Drop redundant list_del_init
- Only enqueue already completed requests to avoid races
- Fixup preempt-to-busy vs reset of a virtual request
- Protect peeking at execlists->active
- execlists->active is serialised by the tasklet

drm-intel-next-fixes-2019-09-19:
- Extend old HSW workaround to fix some GPU hangs on Haswell GT2
- Fix return error code on GEM mmap.
- White list a chicken bit register for push constants legacy mode on Mesa
- Fix resume issue related to GGTT restore
- Remove incorrect BUG_ON on execlist's schedule-out
- Fix unrecoverable GPU hangs with Vulkan compute workloads on SKL

drm-intel-next-fixes-2019-09-26:
- Fix concurrence on cases where requests where getting retired at same time as resubmitted to HW
- Fix gen9 display resolutions by setting the right max plane width
- Fix GPU hang on preemption
- Mark contents as dirty on a write fault. This was breaking cursor sprite with dumb buffers.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191010143039.GA15313@intel.com
parents 46fe219d e137d3ab
...@@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, ...@@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
switch (fb->modifier) { switch (fb->modifier) {
case DRM_FORMAT_MOD_LINEAR: case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_X_TILED: case I915_FORMAT_MOD_X_TILED:
return 4096; /*
* Validated limit is 4k, but has 5k should
* work apart from the following features:
* - Ytile (already limited to 4k)
* - FP16 (already limited to 4k)
* - render compression (already limited to 4k)
* - KVMR sprite and cursor (don't care)
* - horizontal panning (TODO verify this)
* - pipe and plane scaling (TODO verify this)
*/
if (cpp == 8)
return 4096;
else
return 5120;
case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Y_TILED_CCS:
case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS:
/* FIXME AUX plane? */ /* FIXME AUX plane? */
......
...@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) ...@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
wakeref = intel_runtime_pm_get(rpm); wakeref = intel_runtime_pm_get(rpm);
srcu = intel_gt_reset_trylock(ggtt->vm.gt); ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
if (srcu < 0) { if (ret)
ret = srcu;
goto err_rpm; goto err_rpm;
}
ret = i915_mutex_lock_interruptible(dev); ret = i915_mutex_lock_interruptible(dev);
if (ret) if (ret)
...@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) ...@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
intel_wakeref_auto(&i915->ggtt.userfault_wakeref, intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
i915_vma_set_ggtt_write(vma); if (write) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
i915_vma_set_ggtt_write(vma);
obj->mm.dirty = true;
}
err_fence: err_fence:
i915_vma_unpin_fence(vma); i915_vma_unpin_fence(vma);
......
...@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915) ...@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
if (i915_gem_init_hw(i915)) if (i915_gem_init_hw(i915))
goto err_wedged; goto err_wedged;
......
...@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists) ...@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active); return READ_ONCE(*execlists->active);
} }
static inline void
execlists_active_lock_bh(struct intel_engine_execlists *execlists)
{
local_bh_disable(); /* prevent local softirq and lock recursion */
tasklet_lock(&execlists->tasklet);
}
static inline void
execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
{
tasklet_unlock(&execlists->tasklet);
local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
}
struct i915_request * struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
......
...@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, ...@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m) struct drm_printer *m)
{ {
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
const struct intel_engine_execlists * const execlists = struct intel_engine_execlists * const execlists = &engine->execlists;
&engine->execlists;
unsigned long flags;
u64 addr; u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
...@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, ...@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]); idx, hws[idx * 2], hws[idx * 2 + 1]);
} }
spin_lock_irqsave(&engine->active.lock, flags); execlists_active_lock_bh(execlists);
for (port = execlists->active; (rq = *port); port++) { for (port = execlists->active; (rq = *port); port++) {
char hdr[80]; char hdr[80];
int len; int len;
...@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, ...@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
hwsp_seqno(rq)); hwsp_seqno(rq));
print_request(m, rq, hdr); print_request(m, rq, hdr);
} }
spin_unlock_irqrestore(&engine->active.lock, flags); execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) { } else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE)); ENGINE_READ(engine, RING_PP_DIR_BASE));
...@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) ...@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine)) if (!intel_engine_supports_stats(engine))
return -ENODEV; return -ENODEV;
spin_lock_irqsave(&engine->active.lock, flags); execlists_active_lock_bh(execlists);
write_seqlock(&engine->stats.lock); write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) { if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY; err = -EBUSY;
...@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) ...@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
} }
unlock: unlock:
write_sequnlock(&engine->stats.lock); write_sequnlock_irqrestore(&engine->stats.lock, flags);
spin_unlock_irqrestore(&engine->active.lock, flags); execlists_active_unlock_bh(execlists);
return err; return err;
} }
......
...@@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq) ...@@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old; struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq); trace_i915_request_out(rq);
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight); old = READ_ONCE(ce->inflight);
do do
...@@ -797,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev, ...@@ -797,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next); GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next)); GEM_BUG_ON(!assert_priority_queue(prev, next));
/*
* We do not submit known completed requests. Therefore if the next
* request is already completed, we can pretend to merge it in
* with the previous context (and we will skip updating the ELSP
* and tracking). Thus hopefully keeping the ELSP full with active
* contexts, despite the best efforts of preempt-to-busy to confuse
* us.
*/
if (i915_request_completed(next))
return true;
if (!can_merge_ctx(prev->hw_context, next->hw_context)) if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false; return false;
...@@ -893,7 +903,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, ...@@ -893,7 +903,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request * static struct i915_request *
last_active(const struct intel_engine_execlists *execlists) last_active(const struct intel_engine_execlists *execlists)
{ {
struct i915_request * const *last = execlists->active; struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last)) while (*last && i915_request_completed(*last))
last++; last++;
...@@ -1172,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -1172,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue; continue;
} }
if (i915_request_completed(rq)) {
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
rq->engine = engine;
__i915_request_submit(rq);
spin_unlock(&ve->base.active.lock);
rb = rb_first_cached(&execlists->virtual);
continue;
}
if (last && !can_merge_rq(last, rq)) { if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock); spin_unlock(&ve->base.active.lock);
return; /* leave this for another */ return; /* leave this for another */
...@@ -1237,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -1237,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine); GEM_BUG_ON(ve->siblings[0] != engine);
} }
__i915_request_submit(rq); if (__i915_request_submit(rq)) {
if (!i915_request_completed(rq)) {
submit = true; submit = true;
last = rq; last = rq;
} }
/*
* Hmm, we have a bunch of virtual engine requests,
* but the first one was already completed (thanks
* preempt-to-busy!). Keep looking at the veng queue
* until we have no more relevant requests (i.e.
* the normal submit queue has higher priority).
*/
if (!submit) {
spin_unlock(&ve->base.active.lock);
rb = rb_first_cached(&execlists->virtual);
continue;
}
} }
spin_unlock(&ve->base.active.lock); spin_unlock(&ve->base.active.lock);
...@@ -1254,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -1254,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i; int i;
priolist_for_each_request_consume(rq, rn, p, i) { priolist_for_each_request_consume(rq, rn, p, i) {
if (i915_request_completed(rq)) bool merge = true;
goto skip;
/* /*
* Can we combine this request with the current port? * Can we combine this request with the current port?
...@@ -1296,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -1296,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context)) ctx_single_port_submission(rq->hw_context))
goto done; goto done;
*port = execlists_schedule_in(last, port - execlists->pending); merge = false;
port++;
} }
last = rq; if (__i915_request_submit(rq)) {
submit = true; if (!merge) {
skip: *port = execlists_schedule_in(last, port - execlists->pending);
__i915_request_submit(rq); port++;
last = NULL;
}
GEM_BUG_ON(last &&
!can_merge_ctx(last->hw_context,
rq->hw_context));
submit = true;
last = rq;
}
} }
rb_erase_cached(&p->node, &execlists->queue); rb_erase_cached(&p->node, &execlists->queue);
...@@ -1593,8 +1608,11 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -1593,8 +1608,11 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{ {
lockdep_assert_held(&engine->active.lock); lockdep_assert_held(&engine->active.lock);
if (!engine->execlists.pending[0]) if (!engine->execlists.pending[0]) {
rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine); execlists_dequeue(engine);
rcu_read_unlock();
}
} }
/* /*
...@@ -2399,10 +2417,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) ...@@ -2399,10 +2417,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq) static struct i915_request *active_request(struct i915_request *rq)
{ {
const struct list_head * const list = &rq->timeline->requests;
const struct intel_context * const ce = rq->hw_context; const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL; struct i915_request *active = NULL;
struct list_head *list;
if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
return rq;
list = &rq->timeline->requests;
list_for_each_entry_from_reverse(rq, list, link) { list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq)) if (i915_request_completed(rq))
break; break;
...@@ -2565,7 +2587,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ...@@ -2565,7 +2587,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i; int i;
priolist_for_each_request_consume(rq, rn, p, i) { priolist_for_each_request_consume(rq, rn, p, i) {
list_del_init(&rq->sched.link);
__i915_request_submit(rq); __i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO); dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq); i915_request_mark_complete(rq);
...@@ -3631,18 +3652,22 @@ static void ...@@ -3631,18 +3652,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{ {
struct virtual_engine *ve = to_virtual_engine(rq->engine); struct virtual_engine *ve = to_virtual_engine(rq->engine);
intel_engine_mask_t allowed, exec;
struct ve_bond *bond; struct ve_bond *bond;
allowed = ~to_request(signal)->engine->mask;
bond = virtual_find_bond(ve, to_request(signal)->engine); bond = virtual_find_bond(ve, to_request(signal)->engine);
if (bond) { if (bond)
intel_engine_mask_t old, new, cmp; allowed &= bond->sibling_mask;
cmp = READ_ONCE(rq->execution_mask); /* Restrict the bonded request to run on only the available engines */
do { exec = READ_ONCE(rq->execution_mask);
old = cmp; while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
new = cmp & bond->sibling_mask; ;
} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
} /* Prevent the master from being re-run on the bonded engines */
to_request(signal)->execution_mask &= ~allowed;
} }
struct intel_context * struct intel_context *
......
...@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq) ...@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context; struct i915_gem_context *hung_ctx = rq->gem_context;
lockdep_assert_held(&engine->active.lock);
if (!i915_request_is_active(rq)) if (!i915_request_is_active(rq))
return; return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link) list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx) if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO); i915_request_skip(rq, -EIO);
...@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) ...@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno, rq->fence.seqno,
yesno(guilty)); yesno(guilty));
lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq)); GEM_BUG_ON(i915_request_completed(rq));
if (guilty) { if (guilty) {
...@@ -1214,10 +1212,8 @@ void intel_gt_handle_error(struct intel_gt *gt, ...@@ -1214,10 +1212,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref); intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
} }
int intel_gt_reset_trylock(struct intel_gt *gt) int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{ {
int srcu;
might_lock(&gt->reset.backoff_srcu); might_lock(&gt->reset.backoff_srcu);
might_sleep(); might_sleep();
...@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt) ...@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock(); rcu_read_lock();
} }
srcu = srcu_read_lock(&gt->reset.backoff_srcu); *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock(); rcu_read_unlock();
return srcu; return 0;
} }
void intel_gt_reset_unlock(struct intel_gt *gt, int tag) void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
......
...@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, ...@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty); void __i915_request_reset(struct i915_request *rq, bool guilty);
int __must_check intel_gt_reset_trylock(struct intel_gt *gt); int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag); void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt); void intel_gt_set_wedged(struct intel_gt *gt);
......
...@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) ...@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id; enum intel_engine_id id;
const int num_engines = const int num_engines =
IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false; bool force_restore = false;
int len; int len;
u32 *cs; u32 *cs;
......
...@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w) ...@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1); whitelist_reg(w, GEN8_HDC_CHICKEN1);
/* WaSendPushConstantsFromMMIO:skl,bxt */
whitelist_reg(w, COMMON_SLICE_CHICKEN2);
} }
static void skl_whitelist_build(struct intel_engine_cs *engine) static void skl_whitelist_build(struct intel_engine_cs *engine)
......
...@@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev) ...@@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev)
if (ret) if (ret)
DRM_ERROR("failed to re-enable GGTT\n"); DRM_ERROR("failed to re-enable GGTT\n");
mutex_lock(&dev_priv->drm.struct_mutex);
i915_gem_restore_gtt_mappings(dev_priv);
i915_gem_restore_fences(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
intel_csr_ucode_resume(dev_priv); intel_csr_ucode_resume(dev_priv);
i915_restore_state(dev_priv); i915_restore_state(dev_priv);
......
...@@ -77,6 +77,12 @@ struct drm_i915_private; ...@@ -77,6 +77,12 @@ struct drm_i915_private;
#define I915_GEM_IDLE_TIMEOUT (HZ / 5) #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
static inline void tasklet_lock(struct tasklet_struct *t)
{
while (!tasklet_trylock(t))
cpu_relax();
}
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{ {
if (!atomic_fetch_inc(&t->count)) if (!atomic_fetch_inc(&t->count))
......
...@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request) ...@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request)
} }
} }
static void remove_from_engine(struct i915_request *rq)
{
struct intel_engine_cs *engine, *locked;
/*
* Virtual engines complicate acquiring the engine timeline lock,
* as their rq->engine pointer is not stable until under that
* engine lock. The simple ploy we use is to take the lock then
* check that the rq still belongs to the newly locked engine.
*/
locked = READ_ONCE(rq->engine);
spin_lock(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
spin_lock(&engine->active.lock);
locked = engine;
}
list_del(&rq->sched.link);
spin_unlock(&locked->active.lock);
}
static bool i915_request_retire(struct i915_request *rq) static bool i915_request_retire(struct i915_request *rq)
{ {
struct i915_active_request *active, *next; struct i915_active_request *active, *next;
...@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq) ...@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq)
* request that we have removed from the HW and put back on a run * request that we have removed from the HW and put back on a run
* queue. * queue.
*/ */
spin_lock(&rq->engine->active.lock); remove_from_engine(rq);
list_del(&rq->sched.link);
spin_unlock(&rq->engine->active.lock);
spin_lock(&rq->lock); spin_lock(&rq->lock);
i915_request_mark_complete(rq); i915_request_mark_complete(rq);
...@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, ...@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
return 0; return 0;
} }
void __i915_request_submit(struct i915_request *request) bool __i915_request_submit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
bool result = false;
GEM_TRACE("%s fence %llx:%lld, current %d\n", GEM_TRACE("%s fence %llx:%lld, current %d\n",
engine->name, engine->name,
...@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request) ...@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock); lockdep_assert_held(&engine->active.lock);
/*
* With the advent of preempt-to-busy, we frequently encounter
* requests that we have unsubmitted from HW, but left running
* until the next ack and so have completed in the meantime. On
* resubmission of that completed request, we can skip
* updating the payload, and execlists can even skip submitting
* the request.
*
* We must remove the request from the caller's priority queue,
* and the caller must only call us when the request is in their
* priority queue, under the active.lock. This ensures that the
* request has *not* yet been retired and we can safely move
* the request into the engine->active.list where it will be
* dropped upon retiring. (Otherwise if resubmit a *retired*
* request, this would be a horrible use-after-free.)
*/
if (i915_request_completed(request))
goto xfer;
if (i915_gem_context_is_banned(request->gem_context)) if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO); i915_request_skip(request, -EIO);
...@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request) ...@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
i915_sw_fence_signaled(&request->semaphore)) i915_sw_fence_signaled(&request->semaphore))
engine->saturated |= request->sched.semaphores; engine->saturated |= request->sched.semaphores;
/* We may be recursing from the signal callback of another i915 fence */ engine->emit_fini_breadcrumb(request,
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->ring->vaddr + request->postfix);
list_move_tail(&request->sched.link, &engine->active.requests); trace_i915_request_execute(request);
engine->serial++;
result = true;
xfer: /* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); list_move_tail(&request->sched.link, &engine->active.requests);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
...@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request) ...@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
spin_unlock(&request->lock); spin_unlock(&request->lock);
engine->emit_fini_breadcrumb(request, return result;
request->ring->vaddr + request->postfix);
engine->serial++;
trace_i915_request_execute(request);
} }
void i915_request_submit(struct i915_request *request) void i915_request_submit(struct i915_request *request)
......
...@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq, ...@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
void i915_request_add(struct i915_request *rq); void i915_request_add(struct i915_request *rq);
void __i915_request_submit(struct i915_request *request); bool __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request);
void i915_request_skip(struct i915_request *request, int error); void i915_request_skip(struct i915_request *request, int error);
......
...@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) ...@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP; return PCH_CNP;
case INTEL_PCH_CMP_DEVICE_ID_TYPE: case INTEL_PCH_CMP_DEVICE_ID_TYPE:
case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
WARN_ON(!IS_COFFEELAKE(dev_priv)); WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */ /* CometPoint is CNP Compatible */
......
...@@ -41,6 +41,7 @@ enum intel_pch { ...@@ -41,6 +41,7 @@ enum intel_pch {
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880
......
...@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915) ...@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) { with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
intel_gt_sanitize(&i915->gt, false); intel_gt_sanitize(&i915->gt, false);
i915_gem_sanitize(i915); i915_gem_sanitize(i915);
mutex_lock(&i915->drm.struct_mutex);
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
mutex_unlock(&i915->drm.struct_mutex);
i915_gem_resume(i915); i915_gem_resume(i915);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment