Commit e1a73a54 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Measure the required reserved size for request emission

Instead of tediously and fragilely counting up the number of dwords
required to emit the breadcrumb to seal a request, fake a request and
measure it automatically once during engine setup.

The downside is that this requires a fair amount of mocking to create a
proper breadcrumb. Still, should be less error prone in future as the
breadcrumb size fluctuates!
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125100520.20163-1-chris@chris-wilson.co.uk
parent 8e525cb4
...@@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx, ...@@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
intel_context_unpin(to_intel_context(ctx, engine)); intel_context_unpin(to_intel_context(ctx, engine));
} }
struct measure_breadcrumb {
struct i915_request rq;
struct i915_timeline timeline;
struct intel_ring ring;
u32 cs[1024];
};
static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
{
struct measure_breadcrumb *frame;
unsigned int dw;
GEM_BUG_ON(!engine->i915->gt.scratch);
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame)
return -ENOMEM;
i915_timeline_init(engine->i915, &frame->timeline, "measure");
INIT_LIST_HEAD(&frame->ring.request_list);
frame->ring.timeline = &frame->timeline;
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
intel_ring_update_space(&frame->ring);
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
frame->rq.timeline = &frame->timeline;
dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
GEM_BUG_ON(dw != engine->emit_breadcrumb_sz);
i915_timeline_fini(&frame->timeline);
kfree(frame);
return dw;
}
/** /**
* intel_engines_init_common - initialize cengine state which might require hw access * intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize. * @engine: Engine to initialize.
...@@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine) ...@@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret) if (ret)
goto err_breadcrumbs; goto err_breadcrumbs;
ret = measure_breadcrumb_sz(engine);
if (ret < 0)
goto err_status_page;
engine->emit_breadcrumb_sz = ret;
return 0; return 0;
err_status_page:
cleanup_status_page(engine);
err_breadcrumbs: err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine); intel_engine_fini_breadcrumbs(engine);
err_unpin_preempt: err_unpin_preempt:
......
...@@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request, ...@@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request,
* used as a workaround for not being allowed to do lite * used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore). * restore with HEAD==TAIL (WaIdleLiteRestore).
*/ */
static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
{ {
/* Ensure there's always at least one preemption point per-request. */ /* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK; *cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs); request->wa_tail = intel_ring_offset(request, cs);
return cs;
} }
static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
{ {
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
...@@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) ...@@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
request->tail = intel_ring_offset(request, cs); request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail); assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
/* We're using qword write, seqno should be aligned to 8 bytes. */ /* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
...@@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) ...@@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
request->tail = intel_ring_offset(request, cs); request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail); assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
......
...@@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) ...@@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
return 0; return 0;
} }
static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
/* First we do the gen6_emit_post_sync_nonzero_flush w/a */ /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
*cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = GFX_OP_PIPE_CONTROL(4);
...@@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen6_rcs_emit_breadcrumb_sz = 14; static const int gen6_rcs_emit_breadcrumb_sz = 14;
...@@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) ...@@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
return 0; return 0;
} }
static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
...@@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen7_rcs_emit_breadcrumb_sz = 6; static const int gen7_rcs_emit_breadcrumb_sz = 6;
static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
...@@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen6_xcs_emit_breadcrumb_sz = 4; static const int gen6_xcs_emit_breadcrumb_sz = 4;
#define GEN7_XCS_WA 32 #define GEN7_XCS_WA 32
static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
int i; int i;
...@@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3; static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
#undef GEN7_XCS_WA #undef GEN7_XCS_WA
...@@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request) ...@@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request)
intel_ring_set_tail(request->ring, request->tail)); intel_ring_set_tail(request->ring, request->tail));
} }
static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = MI_FLUSH; *cs++ = MI_FLUSH;
...@@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int i9xx_emit_breadcrumb_sz = 6; static const int i9xx_emit_breadcrumb_sz = 6;
#define GEN5_WA_STORES 8 /* must be at least 1! */ #define GEN5_WA_STORES 8 /* must be at least 1! */
static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
int i; int i;
...@@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
return cs;
} }
static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2; static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
#undef GEN5_WA_STORES #undef GEN5_WA_STORES
......
...@@ -470,7 +470,7 @@ struct intel_engine_cs { ...@@ -470,7 +470,7 @@ struct intel_engine_cs {
unsigned int dispatch_flags); unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_PINNED BIT(1)
void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
int emit_breadcrumb_sz; int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into /* Pass the request to the hardware queue (e.g. directly into
......
...@@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request, ...@@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request,
return 0; return 0;
} }
static void mock_emit_breadcrumb(struct i915_request *request, static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
u32 *flags)
{ {
return cs;
} }
static void mock_submit_request(struct i915_request *request) static void mock_submit_request(struct i915_request *request)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment