Commit 6689c167 authored by McAulay, Alistair's avatar McAulay, Alistair Committed by Daniel Vetter

drm/i915: Rework GPU reset sequence to match driver load & thaw

This patch is to address Daniels concerns over different code during reset:

http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html

"The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions."

Tested using igt drv_hangman.

V2: Cleaner way of preventing check_wedge returning -EAGAIN
V3: Clean the last_context during reset, to ensure do_switch() does the MI_SET_CONTEXT. As per review.
Signed-off-by: default avatarMcAulay, Alistair <alistair.mcaulay@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
[danvet: Rebase over ctx->ppgtt rework and extend the comment in
check_wedge a bit.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 47c12968
...@@ -844,7 +844,13 @@ int i915_reset(struct drm_device *dev) ...@@ -844,7 +844,13 @@ int i915_reset(struct drm_device *dev)
!dev_priv->ums.mm_suspended) { !dev_priv->ums.mm_suspended) {
dev_priv->ums.mm_suspended = 0; dev_priv->ums.mm_suspended = 0;
/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
dev_priv->gpu_error.reload_in_reset = true;
ret = i915_gem_init_hw(dev); ret = i915_gem_init_hw(dev);
dev_priv->gpu_error.reload_in_reset = false;
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
if (ret) { if (ret) {
DRM_ERROR("Failed hw init on reset %d\n", ret); DRM_ERROR("Failed hw init on reset %d\n", ret);
......
...@@ -1239,6 +1239,9 @@ struct i915_gpu_error { ...@@ -1239,6 +1239,9 @@ struct i915_gpu_error {
/* For missed irq/seqno simulation. */ /* For missed irq/seqno simulation. */
unsigned int test_irq_rings; unsigned int test_irq_rings;
/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
bool reload_in_reset;
}; };
enum modeset_restore { enum modeset_restore {
......
...@@ -1085,7 +1085,13 @@ i915_gem_check_wedge(struct i915_gpu_error *error, ...@@ -1085,7 +1085,13 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
if (i915_terminally_wedged(error)) if (i915_terminally_wedged(error))
return -EIO; return -EIO;
return -EAGAIN; /*
* Check if GPU Reset is in progress - we need intel_ring_begin
* to work properly to reinit the hw state while the gpu is
* still marked as reset-in-progress. Handle this with a flag.
*/
if (!error->reload_in_reset)
return -EAGAIN;
} }
return 0; return 0;
......
...@@ -289,34 +289,17 @@ void i915_gem_context_reset(struct drm_device *dev) ...@@ -289,34 +289,17 @@ void i915_gem_context_reset(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
int i; int i;
/* Prevent the hardware from restoring the last context (which hung) on
* the next switch */
for (i = 0; i < I915_NUM_RINGS; i++) { for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = &dev_priv->ring[i]; struct intel_engine_cs *ring = &dev_priv->ring[i];
struct intel_context *dctx = ring->default_context;
struct intel_context *lctx = ring->last_context; struct intel_context *lctx = ring->last_context;
/* Do a fake switch to the default context */ if (lctx) {
if (lctx == dctx) if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
continue; i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
if (!lctx)
continue;
if (dctx->legacy_hw_ctx.rcs_state && i == RCS) { i915_gem_context_unreference(lctx);
WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state, ring->last_context = NULL;
get_context_alignment(dev), 0));
/* Fake a finish/inactive */
dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
dctx->legacy_hw_ctx.rcs_state->active = 0;
} }
if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
i915_gem_context_unreference(lctx);
i915_gem_context_reference(dctx);
ring->last_context = dctx;
} }
} }
...@@ -412,10 +395,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) ...@@ -412,10 +395,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
struct intel_engine_cs *ring; struct intel_engine_cs *ring;
int ret, i; int ret, i;
/* FIXME: We should make this work, even in reset */
if (i915_reset_in_progress(&dev_priv->gpu_error))
return 0;
BUG_ON(!dev_priv->ring[RCS].default_context); BUG_ON(!dev_priv->ring[RCS].default_context);
for_each_ring(ring, dev_priv, i) { for_each_ring(ring, dev_priv, i) {
...@@ -558,7 +537,7 @@ static int do_switch(struct intel_engine_cs *ring, ...@@ -558,7 +537,7 @@ static int do_switch(struct intel_engine_cs *ring,
from = ring->last_context; from = ring->last_context;
if (to->ppgtt) { if (to->ppgtt) {
ret = to->ppgtt->switch_mm(to->ppgtt, ring, false); ret = to->ppgtt->switch_mm(to->ppgtt, ring);
if (ret) if (ret)
goto unpin_out; goto unpin_out;
} }
......
...@@ -204,19 +204,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, ...@@ -204,19 +204,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
/* Broadwell Page Directory Pointer Descriptors */ /* Broadwell Page Directory Pointer Descriptors */
static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
uint64_t val, bool synchronous) uint64_t val)
{ {
struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret; int ret;
BUG_ON(entry >= 4); BUG_ON(entry >= 4);
if (synchronous) {
I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
return 0;
}
ret = intel_ring_begin(ring, 6); ret = intel_ring_begin(ring, 6);
if (ret) if (ret)
return ret; return ret;
...@@ -233,8 +226,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, ...@@ -233,8 +226,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
} }
static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring, struct intel_engine_cs *ring)
bool synchronous)
{ {
int i, ret; int i, ret;
...@@ -243,7 +235,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -243,7 +235,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
for (i = used_pd - 1; i >= 0; i--) { for (i = used_pd - 1; i >= 0; i--) {
dma_addr_t addr = ppgtt->pd_dma_addr[i]; dma_addr_t addr = ppgtt->pd_dma_addr[i];
ret = gen8_write_pdp(ring, i, addr, synchronous); ret = gen8_write_pdp(ring, i, addr);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -708,29 +700,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) ...@@ -708,29 +700,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
} }
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring, struct intel_engine_cs *ring)
bool synchronous)
{ {
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret; int ret;
/* If we're in reset, we can assume the GPU is sufficiently idle to
* manually frob these bits. Ideally we could use the ring functions,
* except our error handling makes it quite difficult (can't use
* intel_ring_begin, ring->flush, or intel_ring_advance)
*
* FIXME: We should try not to special case reset
*/
if (synchronous ||
i915_reset_in_progress(&dev_priv->gpu_error)) {
WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
POSTING_READ(RING_PP_DIR_BASE(ring));
return 0;
}
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret) if (ret)
...@@ -752,29 +725,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -752,29 +725,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
} }
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring, struct intel_engine_cs *ring)
bool synchronous)
{ {
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret; int ret;
/* If we're in reset, we can assume the GPU is sufficiently idle to
* manually frob these bits. Ideally we could use the ring functions,
* except our error handling makes it quite difficult (can't use
* intel_ring_begin, ring->flush, or intel_ring_advance)
*
* FIXME: We should try not to special case reset
*/
if (synchronous ||
i915_reset_in_progress(&dev_priv->gpu_error)) {
WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
POSTING_READ(RING_PP_DIR_BASE(ring));
return 0;
}
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret) if (ret)
...@@ -803,14 +757,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -803,14 +757,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
} }
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring, struct intel_engine_cs *ring)
bool synchronous)
{ {
struct drm_device *dev = ppgtt->base.dev; struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
if (!synchronous)
return 0;
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
...@@ -1189,7 +1140,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev) ...@@ -1189,7 +1140,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
if (ppgtt) { if (ppgtt) {
for_each_ring(ring, dev_priv, i) { for_each_ring(ring, dev_priv, i) {
ret = ppgtt->switch_mm(ppgtt, ring, true); ret = ppgtt->switch_mm(ppgtt, ring);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
......
...@@ -264,8 +264,7 @@ struct i915_hw_ppgtt { ...@@ -264,8 +264,7 @@ struct i915_hw_ppgtt {
int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring, struct intel_engine_cs *ring);
bool synchronous);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment