Commit 4142fc67 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-2023-08-04' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "Small set of fixes this week, i915 and a few misc ones. I didn't see
  an amd pull so maybe next week it'll have a few more on that driver.

  ttm:
   - NULL ptr deref fix

  panel:
   - add missing MODULE_DEVICE_TABLE

  imx/ipuv3:
   - timing fix

  i915:
   - Fix bug in getting msg length in AUX CH registers handler
   - Gen12 AUX invalidation fixes
   - Fix premature release of request's reusable memory"

* tag 'drm-fixes-2023-08-04' of git://anongit.freedesktop.org/drm/drm:
  drm/panel: samsung-s6d7aa0: Add MODULE_DEVICE_TABLE
  drm/i915: Fix premature release of request's reusable memory
  drm/i915/gt: Support aux invalidation on all engines
  drm/i915/gt: Poll aux invalidation register bit on invalidation
  drm/i915/gt: Enable the CCS_FLUSH bit in the pipe control and in the CS
  drm/i915/gt: Rename flags with bit_group_X according to the datasheet
  drm/i915/gt: Ensure memory quiesced before invalidation
  drm/i915: Add the gen12_needs_ccs_aux_inv helper
  drm/i915/gt: Cleanup aux invalidation registers
  drm/i915/gvt: Fix bug in getting msg length in AUX CH registers handler
  drm/imx/ipuv3: Fix front porch adjustment upon hactive aligning
  drm/ttm: check null pointer before accessing when swapping
parents 4593f3c2 1958b0f9
...@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state) ...@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state; return MI_ARB_CHECK | 1 << 8 | state;
} }
u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{ {
u32 gsi_offset = gt->uncore->gsi_offset; switch (engine->id) {
case RCS0:
return GEN12_CCS_AUX_INV;
case BCS0:
return GEN12_BCS0_AUX_INV;
case VCS0:
return GEN12_VD0_AUX_INV;
case VCS2:
return GEN12_VD2_AUX_INV;
case VECS0:
return GEN12_VE0_AUX_INV;
case CCS0:
return GEN12_CCS0_AUX_INV;
default:
return INVALID_MMIO_REG;
}
}
static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
{
i915_reg_t reg = gen12_get_aux_inv_reg(engine);
if (IS_PONTEVECCHIO(engine->i915))
return false;
/*
* So far platforms supported by i915 having flat ccs do not require
* AUX invalidation. Check also whether the engine requires it.
*/
return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
}
u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
{
i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
u32 gsi_offset = engine->gt->uncore->gsi_offset;
if (!gen12_needs_ccs_aux_inv(engine))
return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV; *cs++ = AUX_INV;
*cs++ = MI_NOOP;
*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
MI_SEMAPHORE_REGISTER_POLL |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = 0;
*cs++ = 0;
return cs; return cs;
} }
...@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
if (mode & EMIT_FLUSH) { /*
u32 flags = 0; * On Aux CCS platforms the invalidation of the Aux
* table requires quiescing memory traffic beforehand
*/
if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
u32 bit_group_0 = 0;
u32 bit_group_1 = 0;
int err; int err;
u32 *cs; u32 *cs;
...@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (err) if (err)
return err; return err;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; /*
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; * When required, in MTL and beyond platforms we
* need to set the CCS_FLUSH bit in the pipe control
*/
if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */ /* Wa_1409600907:tgl,adl-p */
flags |= PIPE_CONTROL_DEPTH_STALL; bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE; bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX; bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE; bit_group_1 |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL; bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915)) if (!HAS_3D_PIPELINE(engine->i915))
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS) else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
cs = gen12_emit_pipe_control(cs, cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
PIPE_CONTROL0_HDC_PIPELINE_FLUSH, LRC_PPHWSP_SCRATCH_ADDR);
flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs); intel_ring_advance(rq, cs);
} }
...@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS) else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
if (!HAS_FLAT_CCS(rq->engine->i915))
count = 8 + 4;
else
count = 8; count = 8;
if (gen12_needs_ccs_aux_inv(rq->engine))
count += 8;
cs = intel_ring_begin(rq, count); cs = intel_ring_begin(rq, count);
if (IS_ERR(cs)) if (IS_ERR(cs))
...@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (!HAS_FLAT_CCS(rq->engine->i915)) { cs = gen12_emit_aux_table_inv(engine, cs);
/* hsdes: 1809175790 */
cs = gen12_emit_aux_table_inv(rq->engine->gt,
cs, GEN12_GFX_CCS_AUX_NV);
}
*cs++ = preparser_disable(false); *cs++ = preparser_disable(false);
intel_ring_advance(rq, cs); intel_ring_advance(rq, cs);
...@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) ...@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{ {
intel_engine_mask_t aux_inv = 0; u32 cmd = 4;
u32 cmd, *cs; u32 *cs;
cmd = 4;
if (mode & EMIT_INVALIDATE) { if (mode & EMIT_INVALIDATE) {
cmd += 2; cmd += 2;
if (!HAS_FLAT_CCS(rq->engine->i915) && if (gen12_needs_ccs_aux_inv(rq->engine))
(rq->engine->class == VIDEO_DECODE_CLASS || cmd += 8;
rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
aux_inv = rq->engine->mask &
~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
if (aux_inv)
cmd += 4;
}
} }
cs = intel_ring_begin(rq, cmd); cs = intel_ring_begin(rq, cmd);
...@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) ...@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB; cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS) if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD; cmd |= MI_INVALIDATE_BSD;
if (gen12_needs_ccs_aux_inv(rq->engine) &&
rq->engine->class == COPY_ENGINE_CLASS)
cmd |= MI_FLUSH_DW_CCS;
} }
*cs++ = cmd; *cs++ = cmd;
...@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) ...@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */ *cs++ = 0; /* upper addr */
*cs++ = 0; /* value */ *cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (rq->engine->class == VIDEO_DECODE_CLASS)
cs = gen12_emit_aux_table_inv(rq->engine->gt,
cs, GEN12_VD0_AUX_NV);
else
cs = gen12_emit_aux_table_inv(rq->engine->gt,
cs, GEN12_VE0_AUX_NV);
}
if (mode & EMIT_INVALIDATE) if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false); *cs++ = preparser_disable(false);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "intel_gt_regs.h" #include "intel_gt_regs.h"
#include "intel_gpu_commands.h" #include "intel_gpu_commands.h"
struct intel_engine_cs;
struct intel_gt; struct intel_gt;
struct i915_request; struct i915_request;
...@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); ...@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
static inline u32 * static inline u32 *
__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) __gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
u32 bit_group_1, u32 offset)
{ {
memset(batch, 0, 6 * sizeof(u32)); memset(batch, 0, 6 * sizeof(u32));
batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
batch[1] = flags1; batch[1] = bit_group_1;
batch[2] = offset; batch[2] = offset;
return batch + 6; return batch + 6;
} }
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) static inline u32 *gen8_emit_pipe_control(u32 *batch,
u32 bit_group_1, u32 offset)
{ {
return __gen8_emit_pipe_control(batch, 0, flags, offset); return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
} }
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
u32 bit_group_1, u32 offset)
{ {
return __gen8_emit_pipe_control(batch, flags0, flags1, offset); return __gen8_emit_pipe_control(batch, bit_group_0,
bit_group_1, offset);
} }
static inline u32 * static inline u32 *
......
...@@ -121,6 +121,7 @@ ...@@ -121,6 +121,7 @@
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ #define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
#define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
...@@ -299,6 +300,7 @@ ...@@ -299,6 +300,7 @@
#define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_DEPTH_STALL (1<<13)
#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
#define PIPE_CONTROL_WRITE_FLUSH (1<<12) #define PIPE_CONTROL_WRITE_FLUSH (1<<12)
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
......
...@@ -332,9 +332,11 @@ ...@@ -332,9 +332,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180) #define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
#define GEN12_VD0_AUX_NV _MMIO(0x4218) #define GEN12_CCS_AUX_INV _MMIO(0x4208)
#define GEN12_VD1_AUX_NV _MMIO(0x4228) #define GEN12_VD0_AUX_INV _MMIO(0x4218)
#define GEN12_VE0_AUX_INV _MMIO(0x4238)
#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
#define GEN8_RTCR _MMIO(0x4260) #define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264) #define GEN8_M1TCR _MMIO(0x4264)
...@@ -342,14 +344,12 @@ ...@@ -342,14 +344,12 @@
#define GEN8_BTCR _MMIO(0x426c) #define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270) #define GEN8_VTCR _MMIO(0x4270)
#define GEN12_VD2_AUX_NV _MMIO(0x4298)
#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
#define GEN12_VE0_AUX_NV _MMIO(0x4238)
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280) #define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
#define GEN12_VE1_AUX_NV _MMIO(0x42b8) #define GEN12_VD2_AUX_INV _MMIO(0x4298)
#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0) #define AUX_INV REG_BIT(0)
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
......
...@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) ...@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
IS_DG2_G11(ce->engine->i915)) IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
/* hsdes: 1809175790 */ cs = gen12_emit_aux_table_inv(ce->engine, cs);
if (!HAS_FLAT_CCS(ce->engine->i915))
cs = gen12_emit_aux_table_inv(ce->engine->gt,
cs, GEN12_GFX_CCS_AUX_NV);
/* Wa_16014892111 */ /* Wa_16014892111 */
if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
...@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) ...@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0); 0);
/* hsdes: 1809175790 */ return gen12_emit_aux_table_inv(ce->engine, cs);
if (!HAS_FLAT_CCS(ce->engine->i915)) {
if (ce->engine->class == VIDEO_DECODE_CLASS)
cs = gen12_emit_aux_table_inv(ce->engine->gt,
cs, GEN12_VD0_AUX_NV);
else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
cs = gen12_emit_aux_table_inv(ce->engine->gt,
cs, GEN12_VE0_AUX_NV);
}
return cs;
} }
static void static void
......
...@@ -491,7 +491,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, ...@@ -491,7 +491,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
return; return;
} }
msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, reg); msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value);
// check the msg in DATA register. // check the msg in DATA register.
msg = vgpu_vreg(vgpu, offset + 4); msg = vgpu_vreg(vgpu, offset + 4);
......
...@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) ...@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
} }
} while (unlikely(is_barrier(active))); } while (unlikely(is_barrier(active)));
if (!__i915_active_fence_set(active, fence)) fence = __i915_active_fence_set(active, fence);
if (!fence)
__i915_active_acquire(ref); __i915_active_acquire(ref);
else
dma_fence_put(fence);
out: out:
i915_active_release(ref); i915_active_release(ref);
...@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref, ...@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref,
return NULL; return NULL;
} }
rcu_read_lock();
prev = __i915_active_fence_set(active, fence); prev = __i915_active_fence_set(active, fence);
if (prev) if (!prev)
prev = dma_fence_get_rcu(prev);
else
__i915_active_acquire(ref); __i915_active_acquire(ref);
rcu_read_unlock();
return prev; return prev;
} }
...@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq) ...@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
* *
* Records the new @fence as the last active fence along its timeline in * Records the new @fence as the last active fence along its timeline in
* this active tracker, moving the tracking callbacks from the previous * this active tracker, moving the tracking callbacks from the previous
* fence onto this one. Returns the previous fence (if not already completed), * fence onto this one. Gets and returns a reference to the previous fence
* which the caller must ensure is executed before the new fence. To ensure * (if not already completed), which the caller must put after making sure
* that the order of fences within the timeline of the i915_active_fence is * that it is executed before the new fence. To ensure that the order of
* understood, it should be locked by the caller. * fences within the timeline of the i915_active_fence is understood, it
* should be locked by the caller.
*/ */
struct dma_fence * struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active, __i915_active_fence_set(struct i915_active_fence *active,
...@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active, ...@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
struct dma_fence *prev; struct dma_fence *prev;
unsigned long flags; unsigned long flags;
if (fence == rcu_access_pointer(active->fence)) /*
* In case of fences embedded in i915_requests, their memory is
* SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
* by new requests. Then, there is a risk of passing back a pointer
* to a new, completely unrelated fence that reuses the same memory
* while tracked under a different active tracker. Combined with i915
* perf open/close operations that build await dependencies between
* engine kernel context requests and user requests from different
* timelines, this can lead to dependency loops and infinite waits.
*
* As a countermeasure, we try to get a reference to the active->fence
* first, so if we succeed and pass it back to our user then it is not
* released and potentially reused by an unrelated request before the
* user has a chance to set up an await dependency on it.
*/
prev = i915_active_fence_get(active);
if (fence == prev)
return fence; return fence;
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
...@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active, ...@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
* Consider that we have two threads arriving (A and B), with * Consider that we have two threads arriving (A and B), with
* C already resident as the active->fence. * C already resident as the active->fence.
* *
* A does the xchg first, and so it sees C or NULL depending * Both A and B have got a reference to C or NULL, depending on the
* on the timing of the interrupt handler. If it is NULL, the * timing of the interrupt handler. Let's assume that if A has got C
* previous fence must have been signaled and we know that * then it has locked C first (before B).
* we are first on the timeline. If it is still present,
* we acquire the lock on that fence and serialise with the interrupt
* handler, in the process removing it from any future interrupt
* callback. A will then wait on C before executing (if present).
*
* As B is second, it sees A as the previous fence and so waits for
* it to complete its transition and takes over the occupancy for
* itself -- remembering that it needs to wait on A before executing.
* *
* Note the strong ordering of the timeline also provides consistent * Note the strong ordering of the timeline also provides consistent
* nesting rules for the fence->lock; the inner lock is always the * nesting rules for the fence->lock; the inner lock is always the
* older lock. * older lock.
*/ */
spin_lock_irqsave(fence->lock, flags); spin_lock_irqsave(fence->lock, flags);
prev = xchg(__active_fence_slot(active), fence); if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
/*
* A does the cmpxchg first, and so it sees C or NULL, as before, or
* something else, depending on the timing of other threads and/or
* interrupt handler. If not the same as before then A unlocks C if
* applicable and retries, starting from an attempt to get a new
* active->fence. Meanwhile, B follows the same path as A.
* Once A succeeds with cmpxch, B fails again, retires, gets A from
* active->fence, locks it as soon as A completes, and possibly
* succeeds with cmpxchg.
*/
while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
if (prev) { if (prev) {
spin_unlock(prev->lock);
dma_fence_put(prev);
}
spin_unlock_irqrestore(fence->lock, flags);
prev = i915_active_fence_get(active);
GEM_BUG_ON(prev == fence); GEM_BUG_ON(prev == fence);
spin_lock_irqsave(fence->lock, flags);
if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
}
/*
* If prev is NULL then the previous fence must have been signaled
* and we know that we are first on the timeline. If it is still
* present then, having the lock on that fence already acquired, we
* serialise with the interrupt handler, in the process of removing it
* from any future interrupt callback. A will then wait on C before
* executing (if present).
*
* As B is second, it sees A as the previous fence and so waits for
* it to complete its transition and takes over the occupancy for
* itself -- remembering that it needs to wait on A before executing.
*/
if (prev) {
__list_del_entry(&active->cb.node); __list_del_entry(&active->cb.node);
spin_unlock(prev->lock); /* serialise with prev->cb_list */ spin_unlock(prev->lock); /* serialise with prev->cb_list */
} }
...@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active, ...@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
int err = 0; int err = 0;
/* Must maintain timeline ordering wrt previous active requests */ /* Must maintain timeline ordering wrt previous active requests */
rcu_read_lock();
fence = __i915_active_fence_set(active, &rq->fence); fence = __i915_active_fence_set(active, &rq->fence);
if (fence) /* but the previous fence may not belong to that timeline! */
fence = dma_fence_get_rcu(fence);
rcu_read_unlock();
if (fence) { if (fence) {
err = i915_request_await_dma_fence(rq, fence); err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence); dma_fence_put(fence);
......
...@@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq, ...@@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
/*
* Users have to put a reference potentially got by
* __i915_active_fence_set() to the returned request
* when no longer needed
*/
return to_request(__i915_active_fence_set(&timeline->last_request, return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence)); &rq->fence));
} }
...@@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq, ...@@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
0); 0);
} }
/*
* Users have to put the reference to prev potentially got
* by __i915_active_fence_set() when no longer needed
*/
return prev; return prev;
} }
...@@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) ...@@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = __i915_request_ensure_ordering(rq, timeline); prev = __i915_request_ensure_ordering(rq, timeline);
else else
prev = __i915_request_ensure_parallel_ordering(rq, timeline); prev = __i915_request_ensure_parallel_ordering(rq, timeline);
if (prev)
i915_request_put(prev);
/* /*
* Make sure that no request gazumped us - if it was allocated after * Make sure that no request gazumped us - if it was allocated after
......
...@@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc) ...@@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n", dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n",
sig_cfg.mode.hactive, new_hactive); sig_cfg.mode.hactive, new_hactive);
sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive; sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive;
sig_cfg.mode.hactive = new_hactive; sig_cfg.mode.hactive = new_hactive;
} }
......
...@@ -569,6 +569,7 @@ static const struct of_device_id s6d7aa0_of_match[] = { ...@@ -569,6 +569,7 @@ static const struct of_device_id s6d7aa0_of_match[] = {
}, },
{ /* sentinel */ } { /* sentinel */ }
}; };
MODULE_DEVICE_TABLE(of, s6d7aa0_of_match);
static struct mipi_dsi_driver s6d7aa0_driver = { static struct mipi_dsi_driver s6d7aa0_driver = {
.probe = s6d7aa0_probe, .probe = s6d7aa0_probe,
......
...@@ -519,6 +519,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, ...@@ -519,6 +519,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->pin_count) { if (bo->pin_count) {
*locked = false; *locked = false;
if (busy)
*busy = false; *busy = false;
return false; return false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment