Commit d6219e1c authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura Committed by Matt Roper

drm/xe: Add Indirect Ring State support

When Indirect Ring State is enabled, the Ring Buffer state and
Batch Buffer state are context save/restored to/from Indirect
Ring State instead of the LRC. The Indirect Ring State is a 4K
page mapped in global GTT at a 4K aligned address. This address
is programmed in the INDIRECT_RING_STATE register of the
corresponding context's LRC.

v2: Fix kernel-doc, add bspec reference
v3: Fix typo in commit text

Bspec: 67296, 67139
Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Signed-off-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-3-niranjana.vishwanathapura@intel.com
parent 85cfc412
...@@ -125,6 +125,7 @@ ...@@ -125,6 +125,7 @@
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1) #define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1) #define CTX_RING_CTL (0x0a + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1) #define CTX_PDP0_LDW (0x32 + 1)
...@@ -23,4 +24,10 @@ ...@@ -23,4 +24,10 @@
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
#define INDIRECT_CTX_RING_CTL (0x0a + 1)
#endif #endif
...@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ...@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (q->hwe->class == XE_ENGINE_CLASS_RENDER) if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
/* Big enough to emit all of the context's 3DSTATE */ /* Big enough to emit all of the context's 3DSTATE */
bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
else else
/* Just pick a large BB size */ /* Just pick a large BB size */
bb = xe_bb_new(gt, SZ_4K, false); bb = xe_bb_new(gt, SZ_4K, false);
...@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) ...@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_tuning_process_lrc(hwe); xe_tuning_process_lrc(hwe);
default_lrc = drmm_kzalloc(&xe->drm, default_lrc = drmm_kzalloc(&xe->drm,
xe_lrc_size(xe, hwe->class), xe_gt_lrc_size(gt, hwe->class),
GFP_KERNEL); GFP_KERNEL);
if (!default_lrc) if (!default_lrc)
return -ENOMEM; return -ENOMEM;
...@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) ...@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
xe_map_memcpy_from(xe, default_lrc, xe_map_memcpy_from(xe, default_lrc,
&q->lrc[0].bo->vmap, &q->lrc[0].bo->vmap,
xe_lrc_pphwsp_offset(&q->lrc[0]), xe_lrc_pphwsp_offset(&q->lrc[0]),
xe_lrc_size(xe, hwe->class)); xe_gt_lrc_size(gt, hwe->class));
gt->default_lrc[hwe->class] = default_lrc; gt->default_lrc[hwe->class] = default_lrc;
put_nop_q: put_nop_q:
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <drm/drm_util.h> #include <drm/drm_util.h>
#include "xe_device.h"
#include "xe_device_types.h" #include "xe_device_types.h"
#include "xe_hw_engine.h" #include "xe_hw_engine.h"
...@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, ...@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
u16 instance, u16 instance,
bool logical); bool logical);
static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt)
{
return gt->info.has_indirect_ring_state &&
xe_device_uc_enabled(gt_to_xe(gt));
}
static inline bool xe_gt_is_media_type(struct xe_gt *gt) static inline bool xe_gt_is_media_type(struct xe_gt *gt)
{ {
return gt->info.type == XE_GT_TYPE_MEDIA; return gt->info.type == XE_GT_TYPE_MEDIA;
......
...@@ -110,8 +110,6 @@ struct xe_gt { ...@@ -110,8 +110,6 @@ struct xe_gt {
struct { struct {
/** @info.type: type of GT */ /** @info.type: type of GT */
enum xe_gt_type type; enum xe_gt_type type;
/** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
/** @info.reference_clock: clock frequency */ /** @info.reference_clock: clock frequency */
u32 reference_clock; u32 reference_clock;
/** @info.engine_mask: mask of engines present on GT */ /** @info.engine_mask: mask of engines present on GT */
...@@ -124,6 +122,10 @@ struct xe_gt { ...@@ -124,6 +122,10 @@ struct xe_gt {
u64 __engine_mask; u64 __engine_mask;
/** @info.gmdid: raw GMD_ID value from hardware */ /** @info.gmdid: raw GMD_ID value from hardware */
u32 gmdid; u32 gmdid;
/** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
/** @info.has_indirect_ring_state: GT has indirect ring state support */
u8 has_indirect_ring_state:1;
} info; } info;
/** /**
......
...@@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) ...@@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
{ {
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads); struct xe_gt *gt = ads_to_gt(ads);
size_t total_size = 0, alloc_size, real_size; size_t total_size = 0, alloc_size, real_size;
int class; int class;
...@@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) ...@@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
if (!engine_enable_mask(gt, class)) if (!engine_enable_mask(gt, class))
continue; continue;
real_size = xe_lrc_size(xe, class); real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size); alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size; total_size += alloc_size;
} }
...@@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) ...@@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
xe_gt_assert(gt, gt->default_lrc[class]); xe_gt_assert(gt, gt->default_lrc[class]);
real_size = xe_lrc_size(xe, class); real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size); alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size; total_size += alloc_size;
......
...@@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) ...@@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
if (xe_exec_queue_is_parallel(q)) if (xe_exec_queue_is_parallel(q))
wq_item_append(q); wq_item_append(q);
else else
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
return; return;
......
This diff is collapsed.
...@@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
void xe_lrc_finish(struct xe_lrc *lrc); void xe_lrc_finish(struct xe_lrc *lrc);
size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail);
u32 xe_lrc_ring_tail(struct xe_lrc *lrc);
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_head(struct xe_lrc *lrc);
u32 xe_lrc_ring_space(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc);
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc);
......
...@@ -20,10 +20,14 @@ struct xe_lrc { ...@@ -20,10 +20,14 @@ struct xe_lrc {
*/ */
struct xe_bo *bo; struct xe_bo *bo;
/** @size: size of lrc including any indirect ring state page */
u32 size;
/** @tile: tile which this LRC belongs to */ /** @tile: tile which this LRC belongs to */
struct xe_tile *tile; struct xe_tile *tile;
/** @flags: LRC flags */ /** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
u32 flags; u32 flags;
/** @ring: submission ring state */ /** @ring: submission ring state */
......
...@@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe, ...@@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->primary_gt; gt = tile->primary_gt;
gt->info.id = xe->info.gt_count++; gt->info.id = xe->info.gt_count++;
gt->info.type = XE_GT_TYPE_MAIN; gt->info.type = XE_GT_TYPE_MAIN;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
gt->info.__engine_mask = graphics_desc->hw_engine_mask; gt->info.__engine_mask = graphics_desc->hw_engine_mask;
if (MEDIA_VER(xe) < 13 && media_desc) if (MEDIA_VER(xe) < 13 && media_desc)
gt->info.__engine_mask |= media_desc->hw_engine_mask; gt->info.__engine_mask |= media_desc->hw_engine_mask;
...@@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe, ...@@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe,
gt = tile->media_gt; gt = tile->media_gt;
gt->info.type = XE_GT_TYPE_MEDIA; gt->info.type = XE_GT_TYPE_MEDIA;
gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
gt->info.__engine_mask = media_desc->hw_engine_mask; gt->info.__engine_mask = media_desc->hw_engine_mask;
gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
......
...@@ -27,6 +27,7 @@ struct xe_graphics_desc { ...@@ -27,6 +27,7 @@ struct xe_graphics_desc {
u8 has_asid:1; u8 has_asid:1;
u8 has_atomic_enable_pte_bit:1; u8 has_atomic_enable_pte_bit:1;
u8 has_flat_ccs:1; u8 has_flat_ccs:1;
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1; u8 has_range_tlb_invalidation:1;
u8 has_usm:1; u8 has_usm:1;
}; };
...@@ -37,6 +38,8 @@ struct xe_media_desc { ...@@ -37,6 +38,8 @@ struct xe_media_desc {
u8 rel; u8 rel;
u64 hw_engine_mask; /* hardware engines provided by media IP */ u64 hw_engine_mask; /* hardware engines provided by media IP */
u8 has_indirect_ring_state:1;
}; };
struct gmdid_map { struct gmdid_map {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment