Commit 58174462 authored by Mika Kuoppala's avatar Mika Kuoppala Committed by Daniel Vetter

drm/i915: Add reason for capture in error state

We capture error state not only when the GPU hangs but also on
other situations as in interrupt errors and in situations where
we can kick things forward without GPU reset. There will be log
entry on most of these cases. But as error state capture might be
only thing we have, if dmesg was not captured. Or as in GEN4 case,
interrupt error can trigger error state capture without log entry,
the exact reason why capture was made is hard to decipher.

v2: Split out the the error code stuff to separate patch (Ben)

References: https://bugs.freedesktop.org/show_bug.cgi?id=74193Signed-off-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent cb383002
...@@ -3190,9 +3190,8 @@ i915_wedged_set(void *data, u64 val) ...@@ -3190,9 +3190,8 @@ i915_wedged_set(void *data, u64 val)
{ {
struct drm_device *dev = data; struct drm_device *dev = data;
DRM_INFO("Manually setting wedged to %llu\n", val); i915_handle_error(dev, val,
i915_handle_error(dev, val); "Manually setting wedged to %llu", val);
return 0; return 0;
} }
......
...@@ -2008,7 +2008,9 @@ extern void intel_console_resume(struct work_struct *work); ...@@ -2008,7 +2008,9 @@ extern void intel_console_resume(struct work_struct *work);
/* i915_irq.c */ /* i915_irq.c */
void i915_queue_hangcheck(struct drm_device *dev); void i915_queue_hangcheck(struct drm_device *dev);
void i915_handle_error(struct drm_device *dev, bool wedged); __printf(3, 4)
void i915_handle_error(struct drm_device *dev, bool wedged,
const char *fmt, ...);
void gen6_set_pm_mask(struct drm_i915_private *dev_priv, u32 pm_iir, void gen6_set_pm_mask(struct drm_i915_private *dev_priv, u32 pm_iir,
int new_delay); int new_delay);
...@@ -2449,7 +2451,8 @@ static inline void i915_error_state_buf_release( ...@@ -2449,7 +2451,8 @@ static inline void i915_error_state_buf_release(
{ {
kfree(eb->buf); kfree(eb->buf);
} }
void i915_capture_error_state(struct drm_device *dev); void i915_capture_error_state(struct drm_device *dev, bool wedge,
const char *error_msg);
void i915_error_state_get(struct drm_device *dev, void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv); struct i915_error_state_file_priv *error_priv);
void i915_error_state_put(struct i915_error_state_file_priv *error_priv); void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
......
...@@ -1094,16 +1094,30 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, ...@@ -1094,16 +1094,30 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
} }
static void i915_error_capture_msg(struct drm_device *dev, static void i915_error_capture_msg(struct drm_device *dev,
struct drm_i915_error_state *error) struct drm_i915_error_state *error,
bool wedged,
const char *error_msg)
{ {
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
u32 ecode; u32 ecode;
int ring_id = -1; int ring_id = -1, len;
ecode = i915_error_generate_code(dev_priv, error, &ring_id); ecode = i915_error_generate_code(dev_priv, error, &ring_id);
scnprintf(error->error_msg, sizeof(error->error_msg), len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:0x%08x", ring_id, ecode); "GPU HANG: ecode %d:0x%08x", ring_id, ecode);
if (ring_id != -1 && error->ring[ring_id].pid != -1)
len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len,
", in %s [%d]",
error->ring[ring_id].comm,
error->ring[ring_id].pid);
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
", reason: %s, action: %s",
error_msg,
wedged ? "reset" : "continue");
} }
/** /**
...@@ -1115,7 +1129,8 @@ static void i915_error_capture_msg(struct drm_device *dev, ...@@ -1115,7 +1129,8 @@ static void i915_error_capture_msg(struct drm_device *dev,
* out a structure which becomes available in debugfs for user level tools * out a structure which becomes available in debugfs for user level tools
* to pick up. * to pick up.
*/ */
void i915_capture_error_state(struct drm_device *dev) void i915_capture_error_state(struct drm_device *dev, bool wedged,
const char *error_msg)
{ {
static bool warned; static bool warned;
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
...@@ -1141,7 +1156,7 @@ void i915_capture_error_state(struct drm_device *dev) ...@@ -1141,7 +1156,7 @@ void i915_capture_error_state(struct drm_device *dev)
error->overlay = intel_overlay_capture_error_state(dev); error->overlay = intel_overlay_capture_error_state(dev);
error->display = intel_display_capture_error_state(dev); error->display = intel_display_capture_error_state(dev);
i915_error_capture_msg(dev, error); i915_error_capture_msg(dev, error, wedged, error_msg);
DRM_INFO("%s\n", error->error_msg); DRM_INFO("%s\n", error->error_msg);
spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
......
...@@ -1297,8 +1297,8 @@ static void snb_gt_irq_handler(struct drm_device *dev, ...@@ -1297,8 +1297,8 @@ static void snb_gt_irq_handler(struct drm_device *dev,
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT |
GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) { GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) {
DRM_ERROR("GT error interrupt 0x%08x\n", gt_iir); i915_handle_error(dev, false, "GT error interrupt 0x%08x",
i915_handle_error(dev, false); gt_iir);
} }
if (gt_iir & GT_PARITY_ERROR(dev)) if (gt_iir & GT_PARITY_ERROR(dev))
...@@ -1545,8 +1545,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) ...@@ -1545,8 +1545,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) { if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir); i915_handle_error(dev_priv->dev, false,
i915_handle_error(dev_priv->dev, false); "VEBOX CS error interrupt 0x%08x",
pm_iir);
} }
} }
} }
...@@ -2278,11 +2279,18 @@ static void i915_report_and_clear_eir(struct drm_device *dev) ...@@ -2278,11 +2279,18 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
* so userspace knows something bad happened (should trigger collection * so userspace knows something bad happened (should trigger collection
* of a ring dump etc.). * of a ring dump etc.).
*/ */
void i915_handle_error(struct drm_device *dev, bool wedged) void i915_handle_error(struct drm_device *dev, bool wedged,
const char *fmt, ...)
{ {
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
va_list args;
char error_msg[80];
i915_capture_error_state(dev); va_start(args, fmt);
vscnprintf(error_msg, sizeof(error_msg), fmt, args);
va_end(args);
i915_capture_error_state(dev, wedged, error_msg);
i915_report_and_clear_eir(dev); i915_report_and_clear_eir(dev);
if (wedged) { if (wedged) {
...@@ -2585,9 +2593,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) ...@@ -2585,9 +2593,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
*/ */
tmp = I915_READ_CTL(ring); tmp = I915_READ_CTL(ring);
if (tmp & RING_WAIT) { if (tmp & RING_WAIT) {
DRM_ERROR("Kicking stuck wait on %s\n", i915_handle_error(dev, false,
"Kicking stuck wait on %s",
ring->name); ring->name);
i915_handle_error(dev, false);
I915_WRITE_CTL(ring, tmp); I915_WRITE_CTL(ring, tmp);
return HANGCHECK_KICK; return HANGCHECK_KICK;
} }
...@@ -2597,9 +2605,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) ...@@ -2597,9 +2605,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
default: default:
return HANGCHECK_HUNG; return HANGCHECK_HUNG;
case 1: case 1:
DRM_ERROR("Kicking stuck semaphore on %s\n", i915_handle_error(dev, false,
"Kicking stuck semaphore on %s",
ring->name); ring->name);
i915_handle_error(dev, false);
I915_WRITE_CTL(ring, tmp); I915_WRITE_CTL(ring, tmp);
return HANGCHECK_KICK; return HANGCHECK_KICK;
case 0: case 0:
...@@ -2721,7 +2729,7 @@ static void i915_hangcheck_elapsed(unsigned long data) ...@@ -2721,7 +2729,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
} }
if (rings_hung) if (rings_hung)
return i915_handle_error(dev, true); return i915_handle_error(dev, true, "Ring hung");
if (busy_count) if (busy_count)
/* Reset timer case chip hangs without another request /* Reset timer case chip hangs without another request
...@@ -3338,7 +3346,9 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) ...@@ -3338,7 +3346,9 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
*/ */
spin_lock_irqsave(&dev_priv->irq_lock, irqflags); spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
i915_handle_error(dev, false); i915_handle_error(dev, false,
"Command parser error, iir 0x%08x",
iir);
for_each_pipe(pipe) { for_each_pipe(pipe) {
int reg = PIPESTAT(pipe); int reg = PIPESTAT(pipe);
...@@ -3520,7 +3530,9 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) ...@@ -3520,7 +3530,9 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
*/ */
spin_lock_irqsave(&dev_priv->irq_lock, irqflags); spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
i915_handle_error(dev, false); i915_handle_error(dev, false,
"Command parser error, iir 0x%08x",
iir);
for_each_pipe(pipe) { for_each_pipe(pipe) {
int reg = PIPESTAT(pipe); int reg = PIPESTAT(pipe);
...@@ -3757,7 +3769,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) ...@@ -3757,7 +3769,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
*/ */
spin_lock_irqsave(&dev_priv->irq_lock, irqflags); spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
i915_handle_error(dev, false); i915_handle_error(dev, false,
"Command parser error, iir 0x%08x",
iir);
for_each_pipe(pipe) { for_each_pipe(pipe) {
int reg = PIPESTAT(pipe); int reg = PIPESTAT(pipe);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment