Commit 817c70e2 authored by Umesh Nerlige Ramappa's avatar Umesh Nerlige Ramappa Committed by Rodrigo Vivi

drm/xe: Fix use after free when client stats are captured

xe_file_close triggers an asynchronous queue cleanup and then frees up
the xef object. Since queue cleanup flushes all pending jobs and the KMD
stores client usage stats into the xef object after jobs are flushed, we
see a use-after-free for the xef object. Resolve this by taking a
reference to xef from xe_exec_queue.

While at it, revert an earlier change that contained a partial work
around for this issue.

v2:
- Take a ref to xef even for the VM bind queue (Matt)
- Squash patches relevant to that fix and work around (Lucas)

v3: Fix typo (Lucas)

Fixes: ce62827b ("drm/xe: Do not access xe file when updating exec queue run_ticks")
Fixes: 6109f24f ("drm/xe: Add helper to accumulate exec queue runtime")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1908Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240718210548.3580382-5-umesh.nerlige.ramappa@intel.comSigned-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit 2149ded6)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 6309f9b1
...@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) ...@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */ /* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock); mutex_lock(&xef->exec_queue.lock);
xa_for_each(&xef->exec_queue.xa, i, q) { xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q); xe_exec_queue_update_run_ticks(q);
xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
q->old_run_ticks = q->run_ticks;
}
mutex_unlock(&xef->exec_queue.lock); mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */ /* Get the total GPU cycles */
......
...@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) ...@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
{ {
if (q->vm) if (q->vm)
xe_vm_put(q->vm); xe_vm_put(q->vm);
if (q->xef)
xe_file_put(q->xef);
kfree(q); kfree(q);
} }
...@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, ...@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue; goto kill_exec_queue;
args->exec_queue_id = id; args->exec_queue_id = id;
q->xef = xe_file_get(xef);
return 0; return 0;
...@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) ...@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/ */
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{ {
struct xe_file *xef;
struct xe_lrc *lrc; struct xe_lrc *lrc;
u32 old_ts, new_ts; u32 old_ts, new_ts;
...@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) ...@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
if (!q->vm || !q->vm->xef) if (!q->vm || !q->vm->xef)
return; return;
xef = q->vm->xef;
/* /*
* Only sample the first LRC. For parallel submission, all of them are * Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by * scheduled together and we compensate that below by multiplying by
...@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) ...@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/ */
lrc = q->lrc[0]; lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts); new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
q->run_ticks += (new_ts - old_ts) * q->width; xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
} }
void xe_exec_queue_kill(struct xe_exec_queue *q) void xe_exec_queue_kill(struct xe_exec_queue *q)
......
...@@ -38,6 +38,9 @@ enum xe_exec_queue_priority { ...@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
* a kernel object. * a kernel object.
*/ */
struct xe_exec_queue { struct xe_exec_queue {
/** @xef: Back pointer to xe file if this is user created exec queue */
struct xe_file *xef;
/** @gt: graphics tile this exec queue can submit to */ /** @gt: graphics tile this exec queue can submit to */
struct xe_gt *gt; struct xe_gt *gt;
/** /**
...@@ -139,10 +142,6 @@ struct xe_exec_queue { ...@@ -139,10 +142,6 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL. * Protected by @vm's resv. Unused if @vm == NULL.
*/ */
u64 tlb_flush_seqno; u64 tlb_flush_seqno;
/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
u64 old_run_ticks;
/** @run_ticks: hw engine class run time in ticks for this exec queue */
u64 run_ticks;
/** @lrc: logical ring context for this exec queue */ /** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[]; struct xe_lrc *lrc[];
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment