Commit 03a86c24 authored by Matthew Auld's avatar Matthew Auld Committed by Lucas De Marchi

drm/xe: fix unbalanced rpm put() with fence_fini()

Currently we can call fence_fini() twice if something goes wrong when
sending the GuC CT for the tlb request, since we signal the fence and
return an error, leading to the caller also calling fini() on the error
path in the case of stack version of the flow, which leads to an extra
rpm put() which might later cause device to enter suspend when it
shouldn't. It looks like we can just drop the fini() call since the
fence signaller side will already call this for us.

There are known mysterious splats with device going to sleep even with
an rpm ref, and this could be one candidate.

v2 (Matt B):
  - Prefer warning if we detect double fini()

Fixes: f0027022 ("drm/xe: Hold a PM ref when GT TLB invalidations are inflight")
Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarNirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241009084808.204432-3-matthew.auld@intel.com
(cherry picked from commit cfcbc0520d5055825f0647ab922b655688605183)
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
parent 4ceead37
...@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) ...@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay; return hw_tlb_timeout + 2 * delay;
} }
static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{
if (WARN_ON_ONCE(!fence->gt))
return;
xe_pm_runtime_put(gt_to_xe(fence->gt));
fence->gt = NULL; /* fini() should be called once */
}
static void static void
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{ {
...@@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, ...@@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
tlb_timeout_jiffies(gt)); tlb_timeout_jiffies(gt));
} }
spin_unlock_irq(&gt->tlb_invalidation.pending_lock); spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
} else if (ret < 0) { } else {
__invalidation_fence_signal(xe, fence); __invalidation_fence_signal(xe, fence);
} }
if (!ret) { if (!ret) {
...@@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) ...@@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_init(gt, &fence, true); xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
ret = xe_gt_tlb_invalidation_guc(gt, &fence); ret = xe_gt_tlb_invalidation_guc(gt, &fence);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence);
return ret; return ret;
}
xe_gt_tlb_invalidation_fence_wait(&fence); xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
...@@ -496,7 +503,8 @@ static const struct dma_fence_ops invalidation_fence_ops = { ...@@ -496,7 +503,8 @@ static const struct dma_fence_ops invalidation_fence_ops = {
* @stack: fence is stack variable * @stack: fence is stack variable
* *
* Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
* must be called if fence is not signaled. * will be automatically called when fence is signalled (all fences must signal),
* even on error.
*/ */
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
...@@ -516,14 +524,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, ...@@ -516,14 +524,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&fence->base); dma_fence_get(&fence->base);
fence->gt = gt; fence->gt = gt;
} }
/**
* xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
* @fence: TLB invalidation fence to finalize
*
* Drop PM ref which fence took durinig init.
*/
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{
xe_pm_runtime_put(gt_to_xe(fence->gt));
}
...@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); ...@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
bool stack); bool stack);
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
static inline void static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
......
...@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
&fence[fence_id], vma); &fence[fence_id], vma);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait; goto wait;
}
++fence_id; ++fence_id;
if (!tile->media_gt) if (!tile->media_gt)
...@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ...@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->media_gt, ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
&fence[fence_id], vma); &fence[fence_id], vma);
if (ret < 0) { if (ret)
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
goto wait; goto wait;
}
++fence_id; ++fence_id;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment