Commit 4adbcff2 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-fixes-2019-10-10' of...

Merge tag 'drm-intel-fixes-2019-10-10' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

- Fix CML display by adding a missing ID.
- Drop redundant list_del_init
- Only enqueue already completed requests to avoid races
- Fixup preempt-to-busy vs reset of a virtual request
- Protect peeking at execlists->active
- execlists->active is serialised by the tasklet

drm-intel-next-fixes-2019-09-19:
- Extend old HSW workaround to fix some GPU hangs on Haswell GT2
- Fix return error code on GEM mmap.
- White list a chicken bit register for push constants legacy mode on Mesa
- Fix resume issue related to GGTT restore
- Remove incorrect BUG_ON on execlist's schedule-out
- Fix unrecoverable GPU hangs with Vulkan compute workloads on SKL

drm-intel-next-fixes-2019-09-26:
- Fix concurrence on cases where requests where getting retired at same time as resubmitted to HW
- Fix gen9 display resolutions by setting the right max plane width
- Fix GPU hang on preemption
- Mark contents as dirty on a write fault. This was breaking cursor sprite with dumb buffers.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191010143039.GA15313@intel.com
parents 46fe219d e137d3ab
......@@ -3280,7 +3280,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
switch (fb->modifier) {
case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_X_TILED:
return 4096;
/*
* Validated limit is 4k, but has 5k should
* work apart from the following features:
* - Ytile (already limited to 4k)
* - FP16 (already limited to 4k)
* - render compression (already limited to 4k)
* - KVMR sprite and cursor (don't care)
* - horizontal panning (TODO verify this)
* - pipe and plane scaling (TODO verify this)
*/
if (cpp == 8)
return 4096;
else
return 5120;
case I915_FORMAT_MOD_Y_TILED_CCS:
case I915_FORMAT_MOD_Yf_TILED_CCS:
/* FIXME AUX plane? */
......
......@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
wakeref = intel_runtime_pm_get(rpm);
srcu = intel_gt_reset_trylock(ggtt->vm.gt);
if (srcu < 0) {
ret = srcu;
ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
if (ret)
goto err_rpm;
}
ret = i915_mutex_lock_interruptible(dev);
if (ret)
......@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
i915_vma_set_ggtt_write(vma);
if (write) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
i915_vma_set_ggtt_write(vma);
obj->mm.dirty = true;
}
err_fence:
i915_vma_unpin_fence(vma);
......
......@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
mutex_lock(&i915->drm.struct_mutex);
intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
if (i915_gem_init_hw(i915))
goto err_wedged;
......
......@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active);
}
static inline void
execlists_active_lock_bh(struct intel_engine_execlists *execlists)
{
local_bh_disable(); /* prevent local softirq and lock recursion */
tasklet_lock(&execlists->tasklet);
}
static inline void
execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
{
tasklet_unlock(&execlists->tasklet);
local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
}
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
......
......@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
const struct intel_engine_execlists * const execlists =
&engine->execlists;
unsigned long flags;
struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
......@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
spin_lock_irqsave(&engine->active.lock, flags);
execlists_active_lock_bh(execlists);
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
......@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
hwsp_seqno(rq));
print_request(m, rq, hdr);
}
spin_unlock_irqrestore(&engine->active.lock, flags);
execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
......@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
spin_lock_irqsave(&engine->active.lock, flags);
write_seqlock(&engine->stats.lock);
execlists_active_lock_bh(execlists);
write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
......@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
write_sequnlock(&engine->stats.lock);
spin_unlock_irqrestore(&engine->active.lock, flags);
write_sequnlock_irqrestore(&engine->stats.lock, flags);
execlists_active_unlock_bh(execlists);
return err;
}
......
......@@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight);
do
......@@ -797,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
/*
* We do not submit known completed requests. Therefore if the next
* request is already completed, we can pretend to merge it in
* with the previous context (and we will skip updating the ELSP
* and tracking). Thus hopefully keeping the ELSP full with active
* contexts, despite the best efforts of preempt-to-busy to confuse
* us.
*/
if (i915_request_completed(next))
return true;
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
......@@ -893,7 +903,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request *
last_active(const struct intel_engine_execlists *execlists)
{
struct i915_request * const *last = execlists->active;
struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last))
last++;
......@@ -1172,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
if (i915_request_completed(rq)) {
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
rq->engine = engine;
__i915_request_submit(rq);
spin_unlock(&ve->base.active.lock);
rb = rb_first_cached(&execlists->virtual);
continue;
}
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
return; /* leave this for another */
......@@ -1237,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
__i915_request_submit(rq);
if (!i915_request_completed(rq)) {
if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
/*
* Hmm, we have a bunch of virtual engine requests,
* but the first one was already completed (thanks
* preempt-to-busy!). Keep looking at the veng queue
* until we have no more relevant requests (i.e.
* the normal submit queue has higher priority).
*/
if (!submit) {
spin_unlock(&ve->base.active.lock);
rb = rb_first_cached(&execlists->virtual);
continue;
}
}
spin_unlock(&ve->base.active.lock);
......@@ -1254,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
if (i915_request_completed(rq))
goto skip;
bool merge = true;
/*
* Can we combine this request with the current port?
......@@ -1296,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
*port = execlists_schedule_in(last, port - execlists->pending);
port++;
merge = false;
}
last = rq;
submit = true;
skip:
__i915_request_submit(rq);
if (__i915_request_submit(rq)) {
if (!merge) {
*port = execlists_schedule_in(last, port - execlists->pending);
port++;
last = NULL;
}
GEM_BUG_ON(last &&
!can_merge_ctx(last->hw_context,
rq->hw_context));
submit = true;
last = rq;
}
}
rb_erase_cached(&p->node, &execlists->queue);
......@@ -1593,8 +1608,11 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
if (!engine->execlists.pending[0])
if (!engine->execlists.pending[0]) {
rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
rcu_read_unlock();
}
}
/*
......@@ -2399,10 +2417,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq)
{
const struct list_head * const list = &rq->timeline->requests;
const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
struct list_head *list;
if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
return rq;
list = &rq->timeline->requests;
list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
......@@ -2565,7 +2587,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
list_del_init(&rq->sched.link);
__i915_request_submit(rq);
dma_fence_set_error(&rq->fence, -EIO);
i915_request_mark_complete(rq);
......@@ -3631,18 +3652,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
allowed = ~to_request(signal)->engine->mask;
bond = virtual_find_bond(ve, to_request(signal)->engine);
if (bond) {
intel_engine_mask_t old, new, cmp;
if (bond)
allowed &= bond->sibling_mask;
cmp = READ_ONCE(rq->execution_mask);
do {
old = cmp;
new = cmp & bond->sibling_mask;
} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
}
/* Restrict the bonded request to run on only the available engines */
exec = READ_ONCE(rq->execution_mask);
while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
;
/* Prevent the master from being re-run on the bonded engines */
to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
......
......@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context;
lockdep_assert_held(&engine->active.lock);
if (!i915_request_is_active(rq))
return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO);
......@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno,
yesno(guilty));
lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
......@@ -1214,10 +1212,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
int intel_gt_reset_trylock(struct intel_gt *gt)
int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
int srcu;
might_lock(&gt->reset.backoff_srcu);
might_sleep();
......@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock();
}
srcu = srcu_read_lock(&gt->reset.backoff_srcu);
*srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
return srcu;
return 0;
}
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
......
......@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty);
int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt);
......
......@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
......
......@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
/* WaSendPushConstantsFromMMIO:skl,bxt */
whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
......
......@@ -1924,6 +1924,11 @@ static int i915_drm_resume(struct drm_device *dev)
if (ret)
DRM_ERROR("failed to re-enable GGTT\n");
mutex_lock(&dev_priv->drm.struct_mutex);
i915_gem_restore_gtt_mappings(dev_priv);
i915_gem_restore_fences(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
intel_csr_ucode_resume(dev_priv);
i915_restore_state(dev_priv);
......
......@@ -77,6 +77,12 @@ struct drm_i915_private;
#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
static inline void tasklet_lock(struct tasklet_struct *t)
{
while (!tasklet_trylock(t))
cpu_relax();
}
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{
if (!atomic_fetch_inc(&t->count))
......
......@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request)
}
}
static void remove_from_engine(struct i915_request *rq)
{
struct intel_engine_cs *engine, *locked;
/*
* Virtual engines complicate acquiring the engine timeline lock,
* as their rq->engine pointer is not stable until under that
* engine lock. The simple ploy we use is to take the lock then
* check that the rq still belongs to the newly locked engine.
*/
locked = READ_ONCE(rq->engine);
spin_lock(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
spin_lock(&engine->active.lock);
locked = engine;
}
list_del(&rq->sched.link);
spin_unlock(&locked->active.lock);
}
static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
......@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq)
* request that we have removed from the HW and put back on a run
* queue.
*/
spin_lock(&rq->engine->active.lock);
list_del(&rq->sched.link);
spin_unlock(&rq->engine->active.lock);
remove_from_engine(rq);
spin_lock(&rq->lock);
i915_request_mark_complete(rq);
......@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
return 0;
}
void __i915_request_submit(struct i915_request *request)
bool __i915_request_submit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
bool result = false;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
engine->name,
......@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
/*
* With the advent of preempt-to-busy, we frequently encounter
* requests that we have unsubmitted from HW, but left running
* until the next ack and so have completed in the meantime. On
* resubmission of that completed request, we can skip
* updating the payload, and execlists can even skip submitting
* the request.
*
* We must remove the request from the caller's priority queue,
* and the caller must only call us when the request is in their
* priority queue, under the active.lock. This ensures that the
* request has *not* yet been retired and we can safely move
* the request into the engine->active.list where it will be
* dropped upon retiring. (Otherwise if resubmit a *retired*
* request, this would be a horrible use-after-free.)
*/
if (i915_request_completed(request))
goto xfer;
if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO);
......@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
i915_sw_fence_signaled(&request->semaphore))
engine->saturated |= request->sched.semaphores;
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
engine->emit_fini_breadcrumb(request,
request->ring->vaddr + request->postfix);
list_move_tail(&request->sched.link, &engine->active.requests);
trace_i915_request_execute(request);
engine->serial++;
result = true;
xfer: /* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
list_move_tail(&request->sched.link, &engine->active.requests);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
......@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
spin_unlock(&request->lock);
engine->emit_fini_breadcrumb(request,
request->ring->vaddr + request->postfix);
engine->serial++;
trace_i915_request_execute(request);
return result;
}
void i915_request_submit(struct i915_request *request)
......
......@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
void i915_request_add(struct i915_request *rq);
void __i915_request_submit(struct i915_request *request);
bool __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);
void i915_request_skip(struct i915_request *request, int error);
......
......@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP;
case INTEL_PCH_CMP_DEVICE_ID_TYPE:
case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */
......
......@@ -41,6 +41,7 @@ enum intel_pch {
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880
......
......@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
intel_gt_sanitize(&i915->gt, false);
i915_gem_sanitize(i915);
mutex_lock(&i915->drm.struct_mutex);
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
mutex_unlock(&i915->drm.struct_mutex);
i915_gem_resume(i915);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment