Commit a6f0f9cf authored by Alan Previn's avatar Alan Previn Committed by Lucas De Marchi

drm/i915/guc: Plumb GuC-capture into gpu_coredump

Add a flags parameter through all of the coredump creation
functions. Add a bitmask flag to indicate if the top
level gpu_coredump event is triggered in response to
a GuC context reset notification.

Using that flag, ensure all coredump functions that
read or print mmio-register values related to work submission
or command-streamer engines are skipped and replaced with
a calls guc-capture module equivalent functions to retrieve
or print the register dump.

While here, split out display related register reading
and printing into its own function that is called agnostic
to whether GuC had triggered the reset.

For now, introduce an empty printing function that can
filled in on a subsequent patch just to handle formatting.
Signed-off-by: default avatarAlan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220321164527.2500062-13-alan.previn.teres.alexis@intel.com
parent 247f8071
......@@ -2229,11 +2229,11 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
if (!cap->error)
goto err_cap;
cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE);
if (!cap->error->gt)
goto err_gpu;
cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE);
if (!cap->error->gt->engine)
goto err_gt;
......
......@@ -1318,7 +1318,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= gt->info.engine_mask;
if (flags & I915_ERROR_CAPTURE) {
i915_capture_error_state(gt, engine_mask);
i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_NONE);
intel_gt_clear_error_registers(gt, engine_mask);
}
......
......@@ -10,6 +10,7 @@
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
#include "guc_capture_fwif.h"
#include "intel_guc_capture.h"
#include "intel_guc_fwif.h"
......@@ -755,6 +756,18 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
* data from GuC and then it's added into guc->capture->outlist linked
* list. This list is used for matchup and printout by i915_gpu_coredump
* and err_print_gt, (when user invokes the error capture sysfs).
*
* GUC --> notify context reset:
* -----------------------------
* --> G2H CONTEXT RESET
* L--> guc_handle_context_reset --> i915_capture_error_state
* L--> i915_gpu_coredump(..IS_GUC_CAPTURE) --> gt_record_engines
* --> capture_engine(..IS_GUC_CAPTURE)
* L--> intel_guc_capture_get_matching_node is where
* detach C from internal linked list and add it into
* intel_engine_coredump struct (if the context and
* engine of the event notification matches a node
* in the link list).
*/
static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
......@@ -1370,6 +1383,63 @@ static void __guc_capture_process_output(struct intel_guc *guc)
__guc_capture_flushlog_complete(guc);
}
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
const struct intel_engine_coredump *ee)
{
return 0;
}
#endif //CONFIG_DRM_I915_CAPTURE_ERROR
void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
{
if (!ee || !ee->guc_capture_node)
return;
guc_capture_add_node_to_cachelist(ee->capture, ee->guc_capture_node);
ee->capture = NULL;
ee->guc_capture_node = NULL;
}
void intel_guc_capture_get_matching_node(struct intel_gt *gt,
struct intel_engine_coredump *ee,
struct intel_context *ce)
{
struct __guc_capture_parsed_output *n, *ntmp;
struct drm_i915_private *i915;
struct intel_guc *guc;
if (!gt || !ee || !ce)
return;
i915 = gt->i915;
guc = &gt->uc.guc;
if (!guc->capture)
return;
GEM_BUG_ON(ee->guc_capture_node);
/*
* Look for a matching GuC reported error capture node from
* the internal output link-list based on lrca, guc-id and engine
* identification.
*/
list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(ee->engine->guc_id) &&
n->eng_class == GUC_ID_TO_ENGINE_CLASS(ee->engine->guc_id) &&
n->guc_id && n->guc_id == ce->guc_id.id &&
(n->lrca & CTX_GTT_ADDRESS_MASK) && (n->lrca & CTX_GTT_ADDRESS_MASK) ==
(ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
list_del(&n->link);
ee->guc_capture_node = n;
ee->capture = guc->capture;
return;
}
}
drm_dbg(&i915->drm, "GuC capture can't match ee to node\n");
}
void intel_guc_capture_process(struct intel_guc *guc)
{
if (guc->capture)
......
......@@ -8,9 +8,18 @@
#include <linux/types.h>
struct drm_i915_error_state_buf;
struct guc_gt_system_info;
struct intel_context;
struct intel_engine_coredump;
struct intel_gt;
struct intel_guc;
void intel_guc_capture_free_node(struct intel_engine_coredump *ee);
int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
const struct intel_engine_coredump *ee);
void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
struct intel_context *ce);
void intel_guc_capture_process(struct intel_guc *guc);
int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
......
......@@ -4031,7 +4031,7 @@ static void capture_error_state(struct intel_guc *guc,
intel_engine_set_hung_context(engine, ce);
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
i915_capture_error_state(gt, engine->mask);
i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
}
......
......@@ -307,7 +307,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
gpu = NULL;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES);
gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
if (IS_ERR(gpu))
return PTR_ERR(gpu);
......
This diff is collapsed.
......@@ -53,6 +53,8 @@ struct i915_request_coredump {
struct i915_sched_attr sched_attr;
};
struct __guc_capture_parsed_output;
struct intel_engine_coredump {
const struct intel_engine_cs *engine;
......@@ -84,6 +86,10 @@ struct intel_engine_coredump {
u32 rc_psmi; /* sleep state */
struct intel_instdone instdone;
/* GuC matched capture-lists info */
struct intel_guc_state_capture *capture;
struct __guc_capture_parsed_output *guc_capture_node;
struct i915_gem_context_coredump {
char comm[TASK_COMM_LEN];
......@@ -124,7 +130,6 @@ struct intel_gt_coredump {
u32 pgtbl_er;
u32 ier;
u32 gtier[6], ngtier;
u32 derrmr;
u32 forcewake;
u32 error; /* gen6+ */
u32 err_int; /* gen7 */
......@@ -137,9 +142,12 @@ struct intel_gt_coredump {
u32 gfx_mode;
u32 gtt_cache;
u32 aux_err; /* gen12 */
u32 sfc_done[I915_MAX_SFC]; /* gen12 */
u32 gam_done; /* gen12 */
/* Display related */
u32 derrmr;
u32 sfc_done[I915_MAX_SFC]; /* gen12 */
u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
......@@ -149,6 +157,7 @@ struct intel_gt_coredump {
struct intel_uc_fw guc_fw;
struct intel_uc_fw huc_fw;
struct i915_vma_coredump *guc_log;
bool is_guc_capture;
} *uc;
struct intel_gt_coredump *next;
......@@ -221,24 +230,27 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
return atomic_read(&error->reset_engine_count[engine->uabi_class]);
}
#define CORE_DUMP_FLAG_NONE 0x0
#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
intel_engine_mask_t engine_mask);
intel_engine_mask_t engine_mask, u32 dump_flags);
void i915_capture_error_state(struct intel_gt *gt,
intel_engine_mask_t engine_mask);
intel_engine_mask_t engine_mask, u32 dump_flags);
struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
......@@ -282,7 +294,7 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err);
#else
static inline void
i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
}
......@@ -293,13 +305,13 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
}
static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
{
return NULL;
}
static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
{
return NULL;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment