Commit c5de70f6 authored by John Harrison's avatar John Harrison

drm/i915/guc: Record CTB info in error logs

When debugging GuC communication issues, it is useful to have the CTB
info available. So add the state and buffer contents to the error
capture log.

Also, add a sub-structure for the GuC specific error capture info as
it is now becoming numerous.
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Reviewed-by: default avatarAlan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-5-John.C.Harrison@Intel.com
parent 368d179a
......@@ -671,6 +671,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
pdev->subsystem_device);
}
static void err_print_guc_ctb(struct drm_i915_error_state_buf *m,
const char *name,
const struct intel_ctb_coredump *ctb)
{
if (!ctb->size)
return;
err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n",
name, ctb->raw_status, ctb->raw_head, ctb->raw_tail,
ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size);
}
static void err_print_uc(struct drm_i915_error_state_buf *m,
const struct intel_uc_coredump *error_uc)
{
......@@ -678,8 +690,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
intel_uc_fw_dump(&error_uc->guc_fw, &p);
intel_uc_fw_dump(&error_uc->huc_fw, &p);
err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->timestamp);
intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp);
intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log);
err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence);
err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0);
err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1);
intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb);
}
static void err_free_sgl(struct scatterlist *sgl)
......@@ -854,7 +870,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
if (error->gt) {
bool print_guc_capture = false;
if (error->gt->uc && error->gt->uc->is_guc_capture)
if (error->gt->uc && error->gt->uc->guc.is_guc_capture)
print_guc_capture = true;
err_print_gt_display(m, error->gt);
......@@ -1009,7 +1025,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc)
{
kfree(uc->guc_fw.path);
kfree(uc->huc_fw.path);
i915_vma_coredump_free(uc->guc_log);
i915_vma_coredump_free(uc->guc.vma_log);
i915_vma_coredump_free(uc->guc.vma_ctb);
kfree(uc);
}
......@@ -1658,6 +1675,23 @@ gt_record_engines(struct intel_gt_coredump *gt,
}
}
static void gt_record_guc_ctb(struct intel_ctb_coredump *saved,
const struct intel_guc_ct_buffer *ctb,
const void *blob_ptr, struct intel_guc *guc)
{
if (!ctb || !ctb->desc)
return;
saved->raw_status = ctb->desc->status;
saved->raw_head = ctb->desc->head;
saved->raw_tail = ctb->desc->tail;
saved->head = ctb->head;
saved->tail = ctb->tail;
saved->size = ctb->size;
saved->desc_offset = ((void *)ctb->desc) - blob_ptr;
saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr;
}
static struct intel_uc_coredump *
gt_record_uc(struct intel_gt_coredump *gt,
struct i915_vma_compress *compress)
......@@ -1684,9 +1718,16 @@ gt_record_uc(struct intel_gt_coredump *gt,
* log times to system times (in conjunction with the error->boottime and
* gt->clock_frequency fields saved elsewhere).
*/
error_uc->timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
"GuC log buffer", compress);
error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
"GuC log buffer", compress);
error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma,
"GuC CT buffer", compress);
error_uc->guc.last_fence = uc->guc.ct.requests.last_fence;
gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send,
uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv,
uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
return error_uc;
}
......@@ -2039,9 +2080,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
error->gt->uc = gt_record_uc(error->gt, compress);
if (error->gt->uc) {
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
error->gt->uc->is_guc_capture = true;
error->gt->uc->guc.is_guc_capture = true;
else
GEM_BUG_ON(error->gt->uc->is_guc_capture);
GEM_BUG_ON(error->gt->uc->guc.is_guc_capture);
}
}
......
......@@ -125,6 +125,15 @@ struct intel_engine_coredump {
struct intel_engine_coredump *next;
};
struct intel_ctb_coredump {
u32 raw_head, head;
u32 raw_tail, tail;
u32 raw_status;
u32 desc_offset;
u32 cmds_offset;
u32 size;
};
struct intel_gt_coredump {
const struct intel_gt *_gt;
bool awake;
......@@ -165,9 +174,14 @@ struct intel_gt_coredump {
struct intel_uc_coredump {
struct intel_uc_fw guc_fw;
struct intel_uc_fw huc_fw;
struct i915_vma_coredump *guc_log;
u32 timestamp;
bool is_guc_capture;
struct guc_info {
struct intel_ctb_coredump ctb[2];
struct i915_vma_coredump *vma_ctb;
struct i915_vma_coredump *vma_log;
u32 timestamp;
u16 last_fence;
bool is_guc_capture;
} guc;
} *uc;
struct intel_gt_coredump *next;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment