Commit 7dc56af5 authored by Chris Wilson's avatar Chris Wilson

drm/i915/selftests: Verify the LRC register layout between init and HW

Before we submit the first context to HW, we need to construct a valid
image of the register state. This layout is defined by the HW and should
match the layout generated by HW when it saves the context image.
Asserting that this should be equivalent should help avoid any undefined
behaviour and verify that we haven't missed anything important!

Of course, having insisted that the initial register state within the
LRC should match that returned by HW, we need to ensure that it does.

v2: Drop the RELATIVE_MMIO flag from gen11, we ignore it for
constructing the lrc image.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190924145950.3011-1-chris@chris-wilson.co.uk
parent 1b74d467
......@@ -1115,7 +1115,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
offset = i915_ggtt_offset(ce->state) +
LRC_STATE_PN * PAGE_SIZE +
(CTX_R_PWR_CLK_STATE + 1) * 4;
CTX_R_PWR_CLK_STATE * 4;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
......
This diff is collapsed.
......@@ -10,60 +10,40 @@
#include <linux/types.h>
/* GEN8 to GEN11 Reg State Context */
#define CTX_LRI_HEADER_0 0x01
#define CTX_CONTEXT_CONTROL 0x02
#define CTX_RING_HEAD 0x04
#define CTX_RING_TAIL 0x06
#define CTX_RING_BUFFER_START 0x08
#define CTX_RING_BUFFER_CONTROL 0x0a
#define CTX_BB_HEAD_U 0x0c
#define CTX_BB_HEAD_L 0x0e
#define CTX_BB_STATE 0x10
#define CTX_SECOND_BB_HEAD_U 0x12
#define CTX_SECOND_BB_HEAD_L 0x14
#define CTX_SECOND_BB_STATE 0x16
#define CTX_BB_PER_CTX_PTR 0x18
#define CTX_RCS_INDIRECT_CTX 0x1a
#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
#define CTX_LRI_HEADER_1 0x21
#define CTX_CTX_TIMESTAMP 0x22
#define CTX_PDP3_UDW 0x24
#define CTX_PDP3_LDW 0x26
#define CTX_PDP2_UDW 0x28
#define CTX_PDP2_LDW 0x2a
#define CTX_PDP1_UDW 0x2c
#define CTX_PDP1_LDW 0x2e
#define CTX_PDP0_UDW 0x30
#define CTX_PDP0_LDW 0x32
#define CTX_LRI_HEADER_2 0x41
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_BUFFER_START (0x08 + 1)
#define CTX_RING_BUFFER_CONTROL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
#define CTX_BB_PER_CTX_PTR (0x18 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
#define CTX_PDP3_LDW (0x26 + 1)
#define CTX_PDP2_UDW (0x28 + 1)
#define CTX_PDP2_LDW (0x2a + 1)
#define CTX_PDP1_UDW (0x2c + 1)
#define CTX_PDP1_LDW (0x2e + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
#define CTX_R_PWR_CLK_STATE (0x42 + 1)
#define GEN9_CTX_RING_MI_MODE 0x54
/* GEN12+ Reg State Context */
#define GEN12_CTX_BB_PER_CTX_PTR 0x12
#define GEN12_CTX_LRI_HEADER_3 0x41
#define CTX_REG(reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
(reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
(reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
(reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
(reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
(reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
} while (0)
#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = px_dma(ppgtt->pd); \
(reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
(reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
(reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
(reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
} while (0)
#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
......
......@@ -2201,3 +2201,145 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return i915_live_subtests(tests, i915);
}
static void hexdump(const void *buf, size_t len)
{
const size_t rowsize = 8 * sizeof(u32);
const void *prev = NULL;
bool skip = false;
size_t pos;
for (pos = 0; pos < len; pos += rowsize) {
char line[128];
if (prev && !memcmp(prev, buf + pos, rowsize)) {
if (!skip) {
pr_info("*\n");
skip = true;
}
continue;
}
WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
rowsize, sizeof(u32),
line, sizeof(line),
false) >= sizeof(line));
pr_info("[%04zx] %s\n", pos, line);
prev = buf + pos;
skip = false;
}
}
static int live_lrc_layout(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
u32 *mem;
int err;
/*
* Check the registers offsets we use to create the initial reg state
* match the layout saved by HW.
*/
mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!mem)
return -ENOMEM;
err = 0;
for_each_engine(engine, gt->i915, id) {
u32 *hw, *lrc;
int dw;
if (!engine->default_state)
continue;
hw = i915_gem_object_pin_map(engine->default_state,
I915_MAP_WB);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
break;
}
hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
lrc = memset(mem, 0, PAGE_SIZE);
execlists_init_reg_state(lrc,
engine->kernel_context,
engine,
engine->kernel_context->ring,
true);
dw = 0;
do {
u32 lri = hw[dw];
if (lri == 0) {
dw++;
continue;
}
if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
pr_err("%s: Expected LRI command at dword %d, found %08x\n",
engine->name, dw, lri);
err = -EINVAL;
break;
}
if (lrc[dw] != lri) {
pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
engine->name, dw, lri, lrc[dw]);
err = -EINVAL;
break;
}
lri &= 0x7f;
lri++;
dw++;
while (lri) {
if (hw[dw] != lrc[dw]) {
pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
engine->name, dw, hw[dw], lrc[dw]);
err = -EINVAL;
break;
}
/*
* Skip over the actual register value as we
* expect that to differ.
*/
dw += 2;
lri -= 2;
}
} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
if (err) {
pr_info("%s: HW register image:\n", engine->name);
hexdump(hw, PAGE_SIZE);
pr_info("%s: SW register image:\n", engine->name);
hexdump(lrc, PAGE_SIZE);
}
i915_gem_object_unpin_map(engine->default_state);
if (err)
break;
}
kfree(mem);
return err;
}
int intel_lrc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_lrc_layout),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
return 0;
return intel_gt_live_subtests(tests, &i915->gt);
}
......@@ -1673,10 +1673,8 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
* in the case that the OA unit has been disabled.
*/
static void
gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
gen8_update_reg_state_unlocked(const struct intel_context *ce,
const struct i915_perf_stream *stream)
{
struct drm_i915_private *i915 = ce->engine->i915;
u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
......@@ -1691,21 +1689,19 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
EU_PERF_CNTL5,
EU_PERF_CNTL6,
};
u32 *reg_state = ce->lrc_reg_state;
int i;
CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
reg_state[ctx_oactxctrl + 1] =
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
GEN8_OA_COUNTER_RESUME;
for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
oa_config_flex_reg(oa_config, flex_regs[i]));
}
for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
reg_state[ctx_flexeu0 + i * 2 + 1] =
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
CTX_REG(reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
intel_sseu_make_rpcs(i915, &ce->sseu));
reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu);
}
struct flex {
......@@ -1729,7 +1725,7 @@ gen8_store_flex(struct i915_request *rq,
offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
do {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = offset + (flex->offset + 1) * sizeof(u32);
*cs++ = offset + flex->offset * sizeof(u32);
*cs++ = 0;
*cs++ = flex->value;
} while (flex++, --count);
......@@ -1863,7 +1859,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
struct drm_i915_private *i915 = stream->dev_priv;
/* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
struct flex regs[] = {
{
GEN8_R_PWR_CLK_STATE,
......@@ -1871,7 +1867,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
},
{
GEN8_OACTXCONTROL,
i915->perf.ctx_oactxctrl_offset,
i915->perf.ctx_oactxctrl_offset + 1,
((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME)
......@@ -2299,9 +2295,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return ret;
}
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce,
u32 *regs)
void i915_oa_init_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine)
{
struct i915_perf_stream *stream;
......@@ -2313,7 +2308,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.exclusive_stream;
if (stream)
gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
gen8_update_reg_state_unlocked(ce, stream);
}
/**
......
......@@ -25,8 +25,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce,
u32 *reg_state);
void i915_oa_init_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine);
#endif /* __I915_PERF_H__ */
......@@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests)
selftest(gt_engines, intel_engine_live_selftests)
selftest(gt_timelines, intel_timeline_live_selftests)
selftest(gt_contexts, intel_context_live_selftests)
selftest(gt_lrc, intel_lrc_live_selftests)
selftest(requests, i915_request_live_selftests)
selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment