• Chris Wilson's avatar
    drm/i915: Disable per-engine reset for Broxton · 2b49e721
    Chris Wilson authored
    Triggering a GPU reset for one engine affects another, notably
    corrupting the context status buffer (CSB) effectively losing track of
    inflight requests.
    
    Adding a few printks:
    diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
    index ad41836fa5e5..a969456bc0fa 100644
    --- a/drivers/gpu/drm/i915/i915_drv.c
    +++ b/drivers/gpu/drm/i915/i915_drv.c
    @@ -1953,6 +1953,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
                    goto out;
            }
    
    +       pr_err("Resetting %s\n", engine->name);
            ret = intel_gpu_reset(engine->i915, intel_engine_flag(engine));
            if (ret) {
                    /* If we fail here, we expect to fallback to a global reset */
    diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
    index 716e5c9ea222..a72bc35d0870 100644
    --- a/drivers/gpu/drm/i915/intel_lrc.c
    +++ b/drivers/gpu/drm/i915/intel_lrc.c
    @@ -355,6 +355,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
                                    execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
                            port_set(&port[n], port_pack(rq, count));
                            desc = execlists_update_context(rq);
    +                       pr_err("%s: in (rq=%x) ctx=%d\n", engine->name, rq->global_seqno, upper_32_bits(desc));
                            GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
                    } else {
                            GEM_BUG_ON(!n);
    @@ -594,9 +595,23 @@ static void intel_lrc_irq_handler(unsigned long data)
                            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
                                    continue;
    
    +                       pr_err("%s: out CSB (%x head=%d, tail=%d), ctx=%d, rq=%d\n",
    +                                       engine->name,
    +                                       readl(csb_mmio),
    +                                       head, tail,
    +                                       readl(buf+2*head+1),
    +                                       port->context_id);
    +
                            /* Check the context/desc id for this event matches */
    -                       GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
    -                                        port->context_id);
    +                       if (readl(buf + 2 * head + 1) != port->context_id) {
    +                               pr_err("%s: BUG CSB (%x head=%d, tail=%d), ctx=%d, rq=%d\n",
    +                                               engine->name,
    +                                               readl(csb_mmio),
    +                                               head, tail,
    +                                               readl(buf+2*head+1),
    +                                               port->context_id);
    +                               BUG();
    +                       }
    
                            rq = port_unpack(port, &count);
                            GEM_BUG_ON(count == 0);
    
    Results in:
    
    [ 6423.006602] Resetting rcs0
    [ 6423.009080] rcs0: in (rq=fffffe70) ctx=1
    [ 6423.009216] rcs0: in (rq=fffffe6f) ctx=3
    [ 6423.009542] rcs0: out CSB (2 head=1, tail=2), ctx=3, rq=3
    [ 6423.009619] Resetting bcs0
    [ 6423.009980] rcs0: BUG CSB (0 head=1, tail=2), ctx=0, rq=3
    
    Note that this bug may be affect all machines and not just Broxton,
    Broxton is just the first machine on which I have confirmed this bug.
    
    Fixes: 142bc7d9 ("drm/i915: Modify error handler for per engine hang recovery")
    Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
    Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
    Cc: Michel Thierry <michel.thierry@intel.com>
    Acked-by: default avatarMichel Thierry <michel.thierry@intel.com>
    Link: https://patchwork.freedesktop.org/patch/msgid/20170721123238.16428-13-chris@chris-wilson.co.ukSigned-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
    Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
    2b49e721
i915_pci.c 15.8 KB