Commit 902eb748 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Tidy up full-ppgtt on Ivybridge

With a couple more memory barriers dotted around the place we can
significantly reduce the MTBF on Ivybridge. Still doesn't really help
Haswell though.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191216142409.2605211-1-chris@chris-wilson.co.uk
parent 0a9a5532
...@@ -362,6 +362,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) ...@@ -362,6 +362,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
*/ */
flags |= PIPE_CONTROL_CS_STALL; flags |= PIPE_CONTROL_CS_STALL;
/*
* CS_STALL suggests at least a post-sync write.
*/
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
/* Just flush everything. Experiments have shown that reducing the /* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance * number of bits based on the write domains has little performance
* impact. * impact.
...@@ -380,13 +386,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) ...@@ -380,13 +386,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
/*
* TLB invalidate requires a post-sync write.
*/
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
/* Workaround: we must issue a pipe_control with CS-stall bit /* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache * set before a pipe_control command that has the state cache
...@@ -1371,50 +1370,26 @@ static int load_pd_dir(struct i915_request *rq, ...@@ -1371,50 +1370,26 @@ static int load_pd_dir(struct i915_request *rq,
const struct intel_engine_cs * const engine = rq->engine; const struct intel_engine_cs * const engine = rq->engine;
u32 *cs; u32 *cs;
cs = intel_ring_begin(rq, 12); cs = intel_ring_begin(rq, 10);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = MI_LOAD_REGISTER_IMM(3);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = valid; *cs++ = valid;
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = intel_gt_scratch_offset(rq->engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
/* Stall until the page table load is complete? */ /* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = intel_gt_scratch_offset(rq->engine->gt, *cs++ = intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT); INTEL_GT_SCRATCH_FIELD_DEFAULT);
intel_ring_advance(rq, cs); intel_ring_advance(rq, cs);
return rq->engine->emit_flush(rq, EMIT_FLUSH);
}
static int flush_tlb(struct i915_request *rq)
{
const struct intel_engine_cs * const engine = rq->engine;
u32 *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
*cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0; return 0;
} }
...@@ -1590,52 +1565,49 @@ static int remap_l3(struct i915_request *rq) ...@@ -1590,52 +1565,49 @@ static int remap_l3(struct i915_request *rq)
return 0; return 0;
} }
static int switch_context(struct i915_request *rq) static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
{ {
struct intel_context *ce = rq->hw_context;
struct i915_address_space *vm = vm_alias(ce);
u32 hw_flags = 0;
int ret; int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (!vm)
return 0;
if (vm) { ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
/* if (ret)
* Not only do we need a full barrier (post-sync write) after return ret;
* invalidating the TLBs, but we need to wait a little bit
* longer. Whether this is merely delaying us, or the
* subsequent flush is a key part of serialising with the
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
return ret;
ret = flush_tlb(rq); /*
if (ret) * Not only do we need a full barrier (post-sync write) after
return ret; * invalidating the TLBs, but we need to wait a little bit
* longer. Whether this is merely delaying us, or the
* subsequent flush is a key part of serialising with the
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
if (ret)
return ret;
ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), 0); return rq->engine->emit_flush(rq, EMIT_FLUSH);
if (ret) }
return ret;
ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); static int switch_context(struct i915_request *rq)
if (ret) {
return ret; struct intel_context *ce = rq->hw_context;
int ret;
ret = flush_tlb(rq); GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
if (ret)
return ret;
ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); ret = switch_mm(rq, vm_alias(ce));
if (ret) if (ret)
return ret; return ret;
}
if (ce->state) { if (ce->state) {
u32 hw_flags;
GEM_BUG_ON(rq->engine->id != RCS0); GEM_BUG_ON(rq->engine->id != RCS0);
hw_flags = 0;
if (!test_bit(CONTEXT_VALID_BIT, &ce->flags)) if (!test_bit(CONTEXT_VALID_BIT, &ce->flags))
hw_flags = MI_RESTORE_INHIBIT; hw_flags = MI_RESTORE_INHIBIT;
......
...@@ -1709,8 +1709,10 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) ...@@ -1709,8 +1709,10 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
gen6_for_each_pde(pt, pd, start, end, pde) gen6_for_each_pde(pt, pd, start, end, pde)
gen6_write_pde(ppgtt, pde, pt); gen6_write_pde(ppgtt, pde, pt);
mb();
ioread32(ppgtt->pd_addr + pde - 1); ioread32(ppgtt->pd_addr + pde - 1);
gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
mb();
mutex_unlock(&ppgtt->flush); mutex_unlock(&ppgtt->flush);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment