Commit 866bc2d3 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-fixes-2020-11-05' of...

Merge tag 'drm-intel-fixes-2020-11-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

- GVT fixes including vGPU suspend/resume fixes and workaround for APL guest GPU hang.
- Fix set domain's cache coherency (Chris)
- Fixes around breadcrumbs (Chris)
- Fix encoder lookup during PSR atomic (Imre)
- Hold onto an explicit ref to i915_vma_work.pinned (Chris)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201105173026.GA858446@intel.com
parents 53aa37fb 537457a9
...@@ -1754,7 +1754,7 @@ void intel_psr_atomic_check(struct drm_connector *connector, ...@@ -1754,7 +1754,7 @@ void intel_psr_atomic_check(struct drm_connector *connector,
return; return;
intel_connector = to_intel_connector(connector); intel_connector = to_intel_connector(connector);
dig_port = enc_to_dig_port(intel_attached_encoder(intel_connector)); dig_port = enc_to_dig_port(to_intel_encoder(new_state->best_encoder));
if (dev_priv->psr.dp != &dig_port->dp) if (dev_priv->psr.dp != &dig_port->dp)
return; return;
......
...@@ -508,21 +508,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -508,21 +508,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (!obj) if (!obj)
return -ENOENT; return -ENOENT;
/*
* Already in the desired write domain? Nothing for us to do!
*
* We apply a little bit of cunning here to catch a broader set of
* no-ops. If obj->write_domain is set, we must be in the same
* obj->read_domains, and only that domain. Therefore, if that
* obj->write_domain matches the request read_domains, we are
* already in the same read/write domain and can skip the operation,
* without having to further check the requested write_domain.
*/
if (READ_ONCE(obj->write_domain) == read_domains) {
err = 0;
goto out;
}
/* /*
* Try to flush the object off the GPU without holding the lock. * Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner * We will repeat the flush holding the lock in the normal manner
...@@ -560,6 +545,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ...@@ -560,6 +545,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err) if (err)
goto out; goto out;
/*
* Already in the desired write domain? Nothing for us to do!
*
* We apply a little bit of cunning here to catch a broader set of
* no-ops. If obj->write_domain is set, we must be in the same
* obj->read_domains, and only that domain. Therefore, if that
* obj->write_domain matches the request read_domains, we are
* already in the same read/write domain and can skip the operation,
* without having to further check the requested write_domain.
*/
if (READ_ONCE(obj->write_domain) == read_domains)
goto out_unpin;
err = i915_gem_object_lock_interruptible(obj, NULL); err = i915_gem_object_lock_interruptible(obj, NULL);
if (err) if (err)
goto out_unpin; goto out_unpin;
......
...@@ -245,22 +245,14 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u ...@@ -245,22 +245,14 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u
} }
static inline u32 * static inline u32 *
__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) __gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
{ {
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
/* w/a for post sync ops following a GPGPU operation we
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
*cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; *cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
*cs++ = gtt_offset; *cs++ = offset;
*cs++ = 0; *cs++ = 0;
*cs++ = value; *cs++ = value;
/* We're thrashing one dword of HWS. */ *cs++ = 0; /* We're thrashing one extra dword. */
*cs++ = 0;
return cs; return cs;
} }
...@@ -268,13 +260,38 @@ __gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 f ...@@ -268,13 +260,38 @@ __gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 f
static inline u32* static inline u32*
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{ {
return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags); /* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
0,
flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
} }
static inline u32* static inline u32*
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{ {
return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1); /* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
flags0,
flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32 *
__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
*cs++ = (MI_FLUSH_DW + 1) | flags;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
return cs;
} }
static inline u32 * static inline u32 *
...@@ -285,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) ...@@ -285,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
/* Offset should be aligned to 8 bytes for both (QW/DW) write types */ /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; return __gen8_emit_flush_dw(cs,
*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; value,
*cs++ = 0; gtt_offset | MI_FLUSH_DW_USE_GTT,
*cs++ = value; flags | MI_FLUSH_DW_OP_STOREDW);
return cs;
} }
static inline void __intel_engine_reset(struct intel_engine_cs *engine, static inline void __intel_engine_reset(struct intel_engine_cs *engine,
......
...@@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = { ...@@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = {
.destroy = execlists_context_destroy, .destroy = execlists_context_destroy,
}; };
static u32 hwsp_offset(const struct i915_request *rq)
{
const struct intel_timeline_cacheline *cl;
/* Before the request is executed, the timeline/cachline is fixed */
cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
if (cl)
return cl->ggtt_offset;
return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
}
static int gen8_emit_init_breadcrumb(struct i915_request *rq) static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{ {
u32 *cs; u32 *cs;
...@@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) ...@@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_request_timeline(rq)->hwsp_offset; *cs++ = hwsp_offset(rq);
*cs++ = 0; *cs++ = 0;
*cs++ = rq->fence.seqno - 1; *cs++ = rq->fence.seqno - 1;
...@@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) ...@@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
u32 addr = i915_request_active_timeline(request)->hwsp_offset; return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
} }
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
...@@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) ...@@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs, cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno, request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset, hwsp_offset(request),
PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL); PIPE_CONTROL_CS_STALL);
...@@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) ...@@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
cs = gen8_emit_ggtt_write_rcs(cs, cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno, request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset, hwsp_offset(request),
PIPE_CONTROL_CS_STALL | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
...@@ -4983,7 +4994,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) ...@@ -4983,7 +4994,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); /* XXX Stalling flush before seqno write; post-sync not */
cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
return gen12_emit_fini_breadcrumb_tail(rq, cs);
} }
static u32 * static u32 *
...@@ -4991,7 +5004,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) ...@@ -4991,7 +5004,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
cs = gen12_emit_ggtt_write_rcs(cs, cs = gen12_emit_ggtt_write_rcs(cs,
request->fence.seqno, request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset, hwsp_offset(request),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH, PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_TILE_CACHE_FLUSH |
......
...@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) ...@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
return cl; return cl;
} }
static void cacheline_acquire(struct intel_timeline_cacheline *cl) static void cacheline_acquire(struct intel_timeline_cacheline *cl,
u32 ggtt_offset)
{ {
if (cl) if (!cl)
i915_active_acquire(&cl->active); return;
cl->ggtt_offset = ggtt_offset;
i915_active_acquire(&cl->active);
} }
static void cacheline_release(struct intel_timeline_cacheline *cl) static void cacheline_release(struct intel_timeline_cacheline *cl)
...@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) ...@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset); tl->fence_context, tl->hwsp_offset);
cacheline_acquire(tl->hwsp_cacheline); cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
if (atomic_fetch_inc(&tl->pin_count)) { if (atomic_fetch_inc(&tl->pin_count)) {
cacheline_release(tl->hwsp_cacheline); cacheline_release(tl->hwsp_cacheline);
__i915_vma_unpin(tl->hwsp_ggtt); __i915_vma_unpin(tl->hwsp_ggtt);
...@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, ...@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset); tl->fence_context, tl->hwsp_offset);
cacheline_acquire(cl); cacheline_acquire(cl, tl->hwsp_offset);
tl->hwsp_cacheline = cl; tl->hwsp_cacheline = cl;
*seqno = timeline_advance(tl); *seqno = timeline_advance(tl);
...@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from, ...@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
if (err) if (err)
goto out; goto out;
*hwsp = i915_ggtt_offset(cl->hwsp->vma) + *hwsp = cl->ggtt_offset;
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
out: out:
i915_active_release(&cl->active); i915_active_release(&cl->active);
return err; return err;
......
...@@ -94,6 +94,8 @@ struct intel_timeline_cacheline { ...@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
struct intel_timeline_hwsp *hwsp; struct intel_timeline_hwsp *hwsp;
void *vaddr; void *vaddr;
u32 ggtt_offset;
struct rcu_head rcu; struct rcu_head rcu;
}; };
......
...@@ -1489,7 +1489,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -1489,7 +1489,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
const struct intel_engine_cs *engine = const struct intel_engine_cs *engine =
intel_gvt_render_mmio_to_engine(vgpu->gvt, offset); intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { if (value != 0 &&
!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n", gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
offset, value); offset, value);
return -EINVAL; return -EINVAL;
...@@ -1650,6 +1651,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, ...@@ -1650,6 +1651,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
return 0; return 0;
} }
/**
* FixMe:
* If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
* 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.)
* Due to the missing flush of bb filled by VM vCPU, host GPU hangs on executing
* these MI_BATCH_BUFFER.
* Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
* PML4 PTE: PAT(0) PCD(1) PWT(1).
* The performance is still expected to be low, will need further improvement.
*/
static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
u64 pat =
GEN8_PPAT(0, CHV_PPAT_SNOOP) |
GEN8_PPAT(1, 0) |
GEN8_PPAT(2, 0) |
GEN8_PPAT(3, CHV_PPAT_SNOOP) |
GEN8_PPAT(4, CHV_PPAT_SNOOP) |
GEN8_PPAT(5, CHV_PPAT_SNOOP) |
GEN8_PPAT(6, CHV_PPAT_SNOOP) |
GEN8_PPAT(7, CHV_PPAT_SNOOP);
vgpu_vreg(vgpu, offset) = lower_32_bits(pat);
return 0;
}
static int guc_status_read(struct intel_vgpu *vgpu, static int guc_status_read(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int offset, void *p_data,
unsigned int bytes) unsigned int bytes)
...@@ -2812,7 +2841,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) ...@@ -2812,7 +2841,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS & ~D_BXT);
MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
MMIO_D(GAMTARBMODE, D_BDW_PLUS); MMIO_D(GAMTARBMODE, D_BDW_PLUS);
...@@ -3139,7 +3168,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) ...@@ -3139,7 +3168,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
NULL, NULL); NULL, NULL);
MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL);
MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS); MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS & ~D_BXT);
return 0; return 0;
} }
...@@ -3313,9 +3342,21 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt) ...@@ -3313,9 +3342,21 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT); MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT);
MMIO_D(GEN6_GFXPAUSE, D_BXT); MMIO_D(GEN6_GFXPAUSE, D_BXT);
MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL);
MMIO_F(GEN8_RING_CS_GPR(RENDER_RING_BASE, 0), 0x40, F_CMD_ACCESS,
0, 0, D_BXT, NULL, NULL);
MMIO_F(GEN8_RING_CS_GPR(GEN6_BSD_RING_BASE, 0), 0x40, F_CMD_ACCESS,
0, 0, D_BXT, NULL, NULL);
MMIO_F(GEN8_RING_CS_GPR(BLT_RING_BASE, 0), 0x40, F_CMD_ACCESS,
0, 0, D_BXT, NULL, NULL);
MMIO_F(GEN8_RING_CS_GPR(VEBOX_RING_BASE, 0), 0x40, F_CMD_ACCESS,
0, 0, D_BXT, NULL, NULL);
MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write);
return 0; return 0;
} }
......
...@@ -1277,7 +1277,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) ...@@ -1277,7 +1277,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
for_each_engine(engine, vgpu->gvt->gt, id) for_each_engine(engine, vgpu->gvt->gt, id)
intel_context_unpin(s->shadow[id]); intel_context_put(s->shadow[id]);
kmem_cache_destroy(s->workloads); kmem_cache_destroy(s->workloads);
} }
...@@ -1369,11 +1369,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) ...@@ -1369,11 +1369,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
ce->ring = __intel_context_ring_size(ring_size); ce->ring = __intel_context_ring_size(ring_size);
} }
ret = intel_context_pin(ce);
intel_context_put(ce);
if (ret)
goto out_shadow_ctx;
s->shadow[i] = ce; s->shadow[i] = ce;
} }
...@@ -1405,7 +1400,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) ...@@ -1405,7 +1400,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
if (IS_ERR(s->shadow[i])) if (IS_ERR(s->shadow[i]))
break; break;
intel_context_unpin(s->shadow[i]);
intel_context_put(s->shadow[i]); intel_context_put(s->shadow[i]);
} }
i915_vm_put(&ppgtt->vm); i915_vm_put(&ppgtt->vm);
...@@ -1479,6 +1473,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) ...@@ -1479,6 +1473,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu_submission *s = &workload->vgpu->submission; struct intel_vgpu_submission *s = &workload->vgpu->submission;
intel_context_unpin(s->shadow[workload->engine->id]);
release_shadow_batch_buffer(workload); release_shadow_batch_buffer(workload);
release_shadow_wa_ctx(&workload->wa_ctx); release_shadow_wa_ctx(&workload->wa_ctx);
...@@ -1724,6 +1719,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, ...@@ -1724,6 +1719,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
ret = intel_context_pin(s->shadow[engine->id]);
if (ret) {
intel_vgpu_destroy_workload(workload);
return ERR_PTR(ret);
}
return workload; return workload;
} }
......
...@@ -314,8 +314,10 @@ static void __vma_release(struct dma_fence_work *work) ...@@ -314,8 +314,10 @@ static void __vma_release(struct dma_fence_work *work)
{ {
struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
if (vw->pinned) if (vw->pinned) {
__i915_gem_object_unpin_pages(vw->pinned); __i915_gem_object_unpin_pages(vw->pinned);
i915_gem_object_put(vw->pinned);
}
i915_vm_free_pt_stash(vw->vm, &vw->stash); i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm); i915_vm_put(vw->vm);
...@@ -431,7 +433,7 @@ int i915_vma_bind(struct i915_vma *vma, ...@@ -431,7 +433,7 @@ int i915_vma_bind(struct i915_vma *vma,
if (vma->obj) { if (vma->obj) {
__i915_gem_object_pin_pages(vma->obj); __i915_gem_object_pin_pages(vma->obj);
work->pinned = vma->obj; work->pinned = i915_gem_object_get(vma->obj);
} }
} else { } else {
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment