Commit 2fdb8a8f authored by Christian König's avatar Christian König

drm/scheduler: rework entity flush, kill and fini

This was buggy because when we had to wait for entities which were
killed as well we would just deadlock.

Instead move all the dependency handling into the callbacks so that
will all happen asynchronously.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014084641.128280-13-christian.koenig@amd.com
parent 2cf9886e
......@@ -140,6 +140,73 @@ bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
return true;
}
static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
drm_sched_fence_finished(job->s_fence);
WARN_ON(job->s_fence->parent);
job->sched->ops->free_job(job);
}
/* Signal the scheduler finished fence when the entity in question is killed. */
static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
struct dma_fence_cb *cb)
{
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
int r;
dma_fence_put(f);
/* Wait for all dependencies to avoid data corruptions */
while (!xa_empty(&job->dependencies)) {
f = xa_erase(&job->dependencies, job->last_dependency++);
r = dma_fence_add_callback(f, &job->finish_cb,
drm_sched_entity_kill_jobs_cb);
if (!r)
return;
dma_fence_put(f);
}
INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
schedule_work(&job->work);
}
/* Remove the entity from the scheduler and kill all pending jobs */
static void drm_sched_entity_kill(struct drm_sched_entity *entity)
{
struct drm_sched_job *job;
struct dma_fence *prev;
if (!entity->rq)
return;
spin_lock(&entity->rq_lock);
entity->stopped = true;
drm_sched_rq_remove_entity(entity->rq, entity);
spin_unlock(&entity->rq_lock);
/* Make sure this entity is not used by the scheduler at the moment */
wait_for_completion(&entity->entity_idle);
prev = dma_fence_get(entity->last_scheduled);
while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
struct drm_sched_fence *s_fence = job->s_fence;
dma_fence_set_error(&s_fence->finished, -ESRCH);
dma_fence_get(&s_fence->finished);
if (!prev || dma_fence_add_callback(prev, &job->finish_cb,
drm_sched_entity_kill_jobs_cb))
drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
prev = &s_fence->finished;
}
dma_fence_put(prev);
}
/**
* drm_sched_entity_flush - Flush a context entity
*
......@@ -180,91 +247,13 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
/* For killed process disable any more IBs enqueue right now */
last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
if ((!last_user || last_user == current->group_leader) &&
(current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) {
spin_lock(&entity->rq_lock);
entity->stopped = true;
drm_sched_rq_remove_entity(entity->rq, entity);
spin_unlock(&entity->rq_lock);
}
(current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
drm_sched_entity_kill(entity);
return ret;
}
EXPORT_SYMBOL(drm_sched_entity_flush);
static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
drm_sched_fence_finished(job->s_fence);
WARN_ON(job->s_fence->parent);
job->sched->ops->free_job(job);
}
/* Signal the scheduler finished fence when the entity in question is killed. */
static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
struct dma_fence_cb *cb)
{
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
dma_fence_put(f);
INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
schedule_work(&job->work);
}
static struct dma_fence *
drm_sched_job_dependency(struct drm_sched_job *job,
struct drm_sched_entity *entity)
{
if (!xa_empty(&job->dependencies))
return xa_erase(&job->dependencies, job->last_dependency++);
if (job->sched->ops->dependency)
return job->sched->ops->dependency(job, entity);
return NULL;
}
static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
{
struct drm_sched_job *job;
struct dma_fence *f;
int r;
while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
struct drm_sched_fence *s_fence = job->s_fence;
/* Wait for all dependencies to avoid data corruptions */
while ((f = drm_sched_job_dependency(job, entity))) {
dma_fence_wait(f, false);
dma_fence_put(f);
}
drm_sched_fence_scheduled(s_fence);
dma_fence_set_error(&s_fence->finished, -ESRCH);
/*
* When pipe is hanged by older entity, new entity might
* not even have chance to submit it's first job to HW
* and so entity->last_scheduled will remain NULL
*/
if (!entity->last_scheduled) {
drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
continue;
}
dma_fence_get(entity->last_scheduled);
r = dma_fence_add_callback(entity->last_scheduled,
&job->finish_cb,
drm_sched_entity_kill_jobs_cb);
if (r == -ENOENT)
drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
else if (r)
DRM_ERROR("fence add callback failed (%d)\n", r);
}
}
/**
* drm_sched_entity_fini - Destroy a context entity
*
......@@ -278,35 +267,19 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
*/
void drm_sched_entity_fini(struct drm_sched_entity *entity)
{
struct drm_gpu_scheduler *sched = NULL;
if (entity->rq) {
sched = entity->rq->sched;
drm_sched_rq_remove_entity(entity->rq, entity);
}
/* Consumption of existing IBs wasn't completed. Forcefully
* remove them here.
*/
if (spsc_queue_count(&entity->job_queue)) {
if (sched) {
/*
* Wait for thread to idle to make sure it isn't processing
* this entity.
* If consumption of existing IBs wasn't completed. Forcefully remove
* them here. Also makes sure that the scheduler won't touch this entity
* any more.
*/
wait_for_completion(&entity->entity_idle);
drm_sched_entity_kill(entity);
}
if (entity->dependency) {
dma_fence_remove_callback(entity->dependency,
&entity->cb);
dma_fence_remove_callback(entity->dependency, &entity->cb);
dma_fence_put(entity->dependency);
entity->dependency = NULL;
}
drm_sched_entity_kill_jobs(entity);
}
dma_fence_put(entity->last_scheduled);
entity->last_scheduled = NULL;
}
......@@ -416,6 +389,19 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
return false;
}
static struct dma_fence *
drm_sched_job_dependency(struct drm_sched_job *job,
struct drm_sched_entity *entity)
{
if (!xa_empty(&job->dependencies))
return xa_erase(&job->dependencies, job->last_dependency++);
if (job->sched->ops->dependency)
return job->sched->ops->dependency(job, entity);
return NULL;
}
struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
{
struct drm_sched_job *sched_job;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment