Commit eb5e7da7 authored by Matthew Brost's avatar Matthew Brost Committed by John Harrison

drm/i915/guc: Reset implementation for new GuC interface

Reset implementation for new GuC interface. This is the legacy reset
implementation which is called when the i915 owns the engine hang check.
Future patches will offload the engine hang check to GuC but we will
continue to maintain this legacy path as a fallback and this code path
is also required if the GuC dies.

With the new GuC interface it is not possible to reset individual
engines - it is only possible to reset the GPU entirely. This patch
forces an entire chip reset if any engine hangs.

v2:
 (Michal)
  - Check for -EPIPE rather than -EIO (CT deadlock/corrupt check)
v3:
 (John H)
  - Split into a series of smaller patches
v4:
 (John H)
  - Fix typo
  - Add braces around if statements in reset code
v5:
 (Checkpatch)
  - Fix warnings

Cc: John Harrison <john.c.harrison@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarJohn Harrison <john.c.harrison@intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-9-matthew.brost@intel.com
parent d1cee2d3
......@@ -170,8 +170,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (intel_gt_is_wedged(gt))
intel_gt_unset_wedged(gt);
intel_uc_sanitize(&gt->uc);
for_each_engine(engine, gt, id)
if (engine->reset.prepare)
engine->reset.prepare(engine);
......@@ -187,6 +185,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
__intel_engine_reset(engine, false);
}
intel_uc_reset(&gt->uc, false);
for_each_engine(engine, gt, id)
if (engine->reset.finish)
engine->reset.finish(engine);
......@@ -239,6 +239,8 @@ int intel_gt_resume(struct intel_gt *gt)
goto err_wedged;
}
intel_uc_reset_finish(&gt->uc);
intel_rps_enable(&gt->rps);
intel_llc_enable(&gt->llc);
......
......@@ -832,6 +832,8 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
__intel_engine_reset(engine, stalled_mask & engine->mask);
local_bh_enable();
intel_uc_reset(&gt->uc, true);
intel_ggtt_restore_fences(gt->ggtt);
return err;
......@@ -856,6 +858,8 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
if (awake & engine->mask)
intel_engine_pm_put(engine);
}
intel_uc_reset_finish(&gt->uc);
}
static void nop_submit_request(struct i915_request *request)
......@@ -909,6 +913,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
for_each_engine(engine, gt, id)
if (engine->reset.cancel)
engine->reset.cancel(engine);
intel_uc_cancel_requests(&gt->uc);
local_bh_enable();
reset_finish(gt, awake);
......@@ -1197,6 +1202,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
if (intel_engine_uses_guc(engine))
return -ENODEV;
if (!intel_engine_pm_get_if_awake(engine))
return 0;
......@@ -1207,13 +1215,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
if (intel_engine_uses_guc(engine))
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
else
ret = intel_gt_reset_engine(engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", engine->name, ret);
goto out;
}
......@@ -1347,7 +1352,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
* Try engine reset when available. We fall back to full reset if
* single reset fails.
*/
if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
if (!intel_uc_uses_guc_submission(&gt->uc) &&
intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
......
......@@ -572,19 +572,6 @@ int intel_guc_suspend(struct intel_guc *guc)
return 0;
}
/**
* intel_guc_reset_engine() - ask GuC to reset an engine
* @guc: intel_guc structure
* @engine: engine to be reset
*/
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine)
{
/* XXX: to be implemented with submission interface rework */
return -ENODEV;
}
/**
* intel_guc_resume() - notify GuC resuming from suspend state
* @guc: the guc
......
......@@ -249,14 +249,16 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout);
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine);
int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
int intel_guc_sched_done_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
void intel_guc_submission_reset_prepare(struct intel_guc *guc);
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
void intel_guc_submission_reset_finish(struct intel_guc *guc);
void intel_guc_submission_cancel_requests(struct intel_guc *guc);
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
#endif
......@@ -565,12 +565,48 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
if (!intel_guc_is_ready(guc))
/* Nothing to do if GuC isn't supported */
if (!intel_uc_supports_guc(uc))
return;
/* Firmware expected to be running when this function is called */
if (!intel_guc_is_ready(guc))
goto sanitize;
if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_prepare(guc);
sanitize:
__uc_sanitize(uc);
}
void intel_uc_reset(struct intel_uc *uc, bool stalled)
{
struct intel_guc *guc = &uc->guc;
/* Firmware can not be running when this function is called */
if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset(guc, stalled);
}
void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
/* Firmware expected to be running when this function is called */
if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
}
void intel_uc_cancel_requests(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
/* Firmware can not be running when this function is called */
if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_cancel_requests(guc);
}
void intel_uc_runtime_suspend(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
......
......@@ -37,6 +37,9 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
void intel_uc_driver_remove(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc);
void intel_uc_reset(struct intel_uc *uc, bool stalled);
void intel_uc_reset_finish(struct intel_uc *uc);
void intel_uc_cancel_requests(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc);
void intel_uc_runtime_suspend(struct intel_uc *uc);
int intel_uc_resume(struct intel_uc *uc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment