Commit 780f262a authored by Chris Wilson's avatar Chris Wilson

drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker

Since we have a cooperative mode now with a direct reset, we can avoid
the contention on struct_mutex and instead try then sleep on the
I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
cleared, all is fine. Otherwise, we sleep for a bit and try again. In
the worst case we sleep for an extra second waiting for the mutex to be
released (no one touching the GPU is allowed the struct_mutex whilst the
I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
this allows us to clean up the reset worker faster.

v2: Remember to call wake_up_bit() after changing (for the faster wakeup
as promised)
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160909131201.16673-12-chris@chris-wilson.co.uk
parent 221fe799
...@@ -1726,8 +1726,8 @@ int i915_resume_switcheroo(struct drm_device *dev) ...@@ -1726,8 +1726,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
* i915_reset - reset chip after a hang * i915_reset - reset chip after a hang
* @dev: drm device to reset * @dev: drm device to reset
* *
* Reset the chip. Useful if a hang is detected. Returns zero on successful * Reset the chip. Useful if a hang is detected. Marks the device as wedged
* reset or otherwise an error code. * on failure.
* *
* Caller must hold the struct_mutex. * Caller must hold the struct_mutex.
* *
...@@ -1739,7 +1739,7 @@ int i915_resume_switcheroo(struct drm_device *dev) ...@@ -1739,7 +1739,7 @@ int i915_resume_switcheroo(struct drm_device *dev)
* - re-init interrupt state * - re-init interrupt state
* - re-init display * - re-init display
*/ */
int i915_reset(struct drm_i915_private *dev_priv) void i915_reset(struct drm_i915_private *dev_priv)
{ {
struct drm_device *dev = &dev_priv->drm; struct drm_device *dev = &dev_priv->drm;
struct i915_gpu_error *error = &dev_priv->gpu_error; struct i915_gpu_error *error = &dev_priv->gpu_error;
...@@ -1748,7 +1748,7 @@ int i915_reset(struct drm_i915_private *dev_priv) ...@@ -1748,7 +1748,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev->struct_mutex); lockdep_assert_held(&dev->struct_mutex);
if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags)) if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0; return;
/* Clear any previous failed attempts at recovery. Time to try again. */ /* Clear any previous failed attempts at recovery. Time to try again. */
__clear_bit(I915_WEDGED, &error->flags); __clear_bit(I915_WEDGED, &error->flags);
...@@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv) ...@@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
intel_sanitize_gt_powersave(dev_priv); intel_sanitize_gt_powersave(dev_priv);
intel_autoenable_gt_powersave(dev_priv); intel_autoenable_gt_powersave(dev_priv);
return 0; wakeup:
wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
return;
error: error:
set_bit(I915_WEDGED, &error->flags); set_bit(I915_WEDGED, &error->flags);
return ret; goto wakeup;
} }
static int i915_pm_suspend(struct device *kdev) static int i915_pm_suspend(struct device *kdev)
......
...@@ -2884,7 +2884,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, ...@@ -2884,7 +2884,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
#endif #endif
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern int i915_reset(struct drm_i915_private *dev_priv); extern void i915_reset(struct drm_i915_private *dev_priv);
extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
......
...@@ -2497,7 +2497,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) ...@@ -2497,7 +2497,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
int ret;
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
...@@ -2512,24 +2511,30 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) ...@@ -2512,24 +2511,30 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
* simulated reset via debugs, so get an RPM reference. * simulated reset via debugs, so get an RPM reference.
*/ */
intel_runtime_pm_get(dev_priv); intel_runtime_pm_get(dev_priv);
intel_prepare_reset(dev_priv); intel_prepare_reset(dev_priv);
do {
/* /*
* All state reset _must_ be completed before we update the * All state reset _must_ be completed before we update the
* reset counter, for otherwise waiters might miss the reset * reset counter, for otherwise waiters might miss the reset
* pending state and not properly drop locks, resulting in * pending state and not properly drop locks, resulting in
* deadlocks with the reset work. * deadlocks with the reset work.
*/ */
mutex_lock(&dev_priv->drm.struct_mutex); if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
ret = i915_reset(dev_priv); i915_reset(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
}
intel_finish_reset(dev_priv); /* We need to wait for anyone holding the lock to wakeup */
} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
I915_RESET_IN_PROGRESS,
TASK_UNINTERRUPTIBLE,
HZ));
intel_finish_reset(dev_priv);
intel_runtime_pm_put(dev_priv); intel_runtime_pm_put(dev_priv);
if (ret == 0) if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
kobject_uevent_env(kobj, kobject_uevent_env(kobj,
KOBJ_CHANGE, reset_done_event); KOBJ_CHANGE, reset_done_event);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment