Commit f556cb0c authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amd: add scheduler fence implementation (v2)

scheduler fence is based on kernel fence framework.

v2: squash in Christian's build fix
Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Reviewed-by: default avatarChristian K?nig <christian.koenig@amd.com>
parent 4af9f07c
...@@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o ...@@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o
# GPU scheduler # GPU scheduler
amdgpu-y += \ amdgpu-y += \
../scheduler/gpu_scheduler.o \ ../scheduler/gpu_scheduler.o \
../scheduler/sched_fence.o \
amdgpu_sched.o amdgpu_sched.o
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
......
...@@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser { ...@@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser {
int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*prepare_job)(struct amdgpu_cs_parser *sched_job);
int (*run_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job);
int (*free_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_cs_parser *sched_job);
struct amd_sched_fence *s_fence;
}; };
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
......
...@@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (amdgpu_enable_scheduler && parser->num_ibs) { if (amdgpu_enable_scheduler && parser->num_ibs) {
struct amdgpu_ring * ring = struct amdgpu_ring * ring =
amdgpu_cs_parser_get_ring(adev, parser); amdgpu_cs_parser_get_ring(adev, parser);
parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return(
&parser->ctx->rings[ring->idx].entity.last_queued_v_seq);
if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) {
r = amdgpu_cs_parser_prepare_job(parser); r = amdgpu_cs_parser_prepare_job(parser);
if (r) if (r)
...@@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
parser->ring = ring; parser->ring = ring;
parser->run_job = amdgpu_cs_parser_run_job; parser->run_job = amdgpu_cs_parser_run_job;
parser->free_job = amdgpu_cs_parser_free_job; parser->free_job = amdgpu_cs_parser_free_job;
amd_sched_push_job(ring->scheduler, mutex_lock(&parser->job_lock);
&parser->ctx->rings[ring->idx].entity, r = amd_sched_push_job(ring->scheduler,
parser); &parser->ctx->rings[ring->idx].entity,
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; parser,
&parser->s_fence);
if (r) {
mutex_unlock(&parser->job_lock);
goto out;
}
parser->ibs[parser->num_ibs - 1].sequence =
amdgpu_ctx_add_fence(parser->ctx, ring,
&parser->s_fence->base,
parser->s_fence->v_seq);
cs->out.handle = parser->s_fence->v_seq;
mutex_unlock(&parser->job_lock);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
return 0; return 0;
} }
......
...@@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, ...@@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence; struct fence *fence;
uint64_t queued_seq; uint64_t queued_seq;
int r;
if (amdgpu_enable_scheduler) {
r = amd_sched_wait_emit(&cring->entity,
seq,
false,
-1);
if (r)
return NULL;
}
spin_lock(&ctx->ring_lock); spin_lock(&ctx->ring_lock);
if (amdgpu_enable_scheduler) if (amdgpu_enable_scheduler)
......
...@@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
sequence = amdgpu_enable_scheduler ? ib->sequence : 0; sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
if (ib->ctx) if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base, &ib->fence->base,
sequence); sequence);
......
...@@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
{ {
int r = 0; int r = 0;
if (amdgpu_enable_scheduler) { if (amdgpu_enable_scheduler) {
uint64_t v_seq;
struct amdgpu_cs_parser *sched_job = struct amdgpu_cs_parser *sched_job =
amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx,
ibs, num_ibs); ibs, num_ibs);
...@@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
return -ENOMEM; return -ENOMEM;
} }
sched_job->free_job = free_job; sched_job->free_job = free_job;
v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); mutex_lock(&sched_job->job_lock);
ibs[num_ibs - 1].sequence = v_seq; r = amd_sched_push_job(ring->scheduler,
amd_sched_push_job(ring->scheduler, &adev->kernel_ctx.rings[ring->idx].entity,
&adev->kernel_ctx.rings[ring->idx].entity, sched_job, &sched_job->s_fence);
sched_job); if (r) {
r = amd_sched_wait_emit( mutex_unlock(&sched_job->job_lock);
&adev->kernel_ctx.rings[ring->idx].entity, kfree(sched_job);
v_seq, return r;
false, }
-1); ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq;
if (r) *f = &sched_job->s_fence->base;
WARN(true, "emit timeout\n"); mutex_unlock(&sched_job->job_lock);
} else } else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r) if (r)
return r; return r;
*f = &ibs[num_ibs - 1].fence->base; *f = &ibs[num_ibs - 1].fence->base;
}
return 0; return 0;
} }
...@@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
uint32_t jobs) uint32_t jobs)
{ {
uint64_t seq_ring = 0; uint64_t seq_ring = 0;
char name[20];
if (!(sched && entity && rq)) if (!(sched && entity && rq))
return -EINVAL; return -EINVAL;
...@@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
entity->scheduler = sched; entity->scheduler = sched;
init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_queue);
init_waitqueue_head(&entity->wait_emit); init_waitqueue_head(&entity->wait_emit);
entity->fence_context = fence_context_alloc(1);
snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
memcpy(entity->name, name, 20);
INIT_LIST_HEAD(&entity->fence_list);
if(kfifo_alloc(&entity->job_queue, if(kfifo_alloc(&entity->job_queue,
jobs * sizeof(void *), jobs * sizeof(void *),
GFP_KERNEL)) GFP_KERNEL))
...@@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
spin_lock_init(&entity->queue_lock); spin_lock_init(&entity->queue_lock);
atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_emitted_v_seq, seq_ring);
atomic64_set(&entity->last_queued_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring);
atomic64_set(&entity->last_signaled_v_seq, seq_ring);
/* Add the entity to the run queue */ /* Add the entity to the run queue */
mutex_lock(&rq->lock); mutex_lock(&rq->lock);
...@@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, ...@@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
*/ */
int amd_sched_push_job(struct amd_gpu_scheduler *sched, int amd_sched_push_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *c_entity, struct amd_sched_entity *c_entity,
void *data) void *data,
struct amd_sched_fence **fence)
{ {
struct amd_sched_job *job = kzalloc(sizeof(struct amd_sched_job), struct amd_sched_job *job;
GFP_KERNEL);
if (!fence)
return -EINVAL;
job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
if (!job) if (!job)
return -ENOMEM; return -ENOMEM;
job->sched = sched; job->sched = sched;
job->s_entity = c_entity; job->s_entity = c_entity;
job->data = data; job->data = data;
*fence = amd_sched_fence_create(c_entity);
if ((*fence) == NULL) {
kfree(job);
return -EINVAL;
}
job->s_fence = *fence;
while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
&c_entity->queue_lock) != sizeof(void *)) { &c_entity->queue_lock) != sizeof(void *)) {
/** /**
...@@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) ...@@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
unsigned long flags; unsigned long flags;
sched = sched_job->sched; sched = sched_job->sched;
atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
sched_job->s_fence->v_seq);
amd_sched_fence_signal(sched_job->s_fence);
spin_lock_irqsave(&sched->queue_lock, flags); spin_lock_irqsave(&sched->queue_lock, flags);
list_del(&sched_job->list); list_del(&sched_job->list);
atomic64_dec(&sched->hw_rq_count); atomic64_dec(&sched->hw_rq_count);
spin_unlock_irqrestore(&sched->queue_lock, flags); spin_unlock_irqrestore(&sched->queue_lock, flags);
sched->ops->process_job(sched, sched_job); sched->ops->process_job(sched, sched_job);
fence_put(&sched_job->s_fence->base);
kfree(sched_job); kfree(sched_job);
wake_up_interruptible(&sched->wait_queue); wake_up_interruptible(&sched->wait_queue);
} }
......
...@@ -45,6 +45,7 @@ struct amd_sched_entity { ...@@ -45,6 +45,7 @@ struct amd_sched_entity {
/* the virtual_seq is unique per context per ring */ /* the virtual_seq is unique per context per ring */
atomic64_t last_queued_v_seq; atomic64_t last_queued_v_seq;
atomic64_t last_emitted_v_seq; atomic64_t last_emitted_v_seq;
atomic64_t last_signaled_v_seq;
/* the job_queue maintains the jobs submitted by clients */ /* the job_queue maintains the jobs submitted by clients */
struct kfifo job_queue; struct kfifo job_queue;
spinlock_t queue_lock; spinlock_t queue_lock;
...@@ -52,6 +53,9 @@ struct amd_sched_entity { ...@@ -52,6 +53,9 @@ struct amd_sched_entity {
wait_queue_head_t wait_queue; wait_queue_head_t wait_queue;
wait_queue_head_t wait_emit; wait_queue_head_t wait_emit;
bool is_pending; bool is_pending;
uint64_t fence_context;
struct list_head fence_list;
char name[20];
}; };
/** /**
...@@ -72,14 +76,35 @@ struct amd_run_queue { ...@@ -72,14 +76,35 @@ struct amd_run_queue {
int (*check_entity_status)(struct amd_sched_entity *entity); int (*check_entity_status)(struct amd_sched_entity *entity);
}; };
struct amd_sched_fence {
struct fence base;
struct fence_cb cb;
struct list_head list;
struct amd_sched_entity *entity;
uint64_t v_seq;
spinlock_t lock;
};
struct amd_sched_job { struct amd_sched_job {
struct list_head list; struct list_head list;
struct fence_cb cb; struct fence_cb cb;
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity; struct amd_sched_entity *s_entity;
void *data; void *data;
struct amd_sched_fence *s_fence;
}; };
extern const struct fence_ops amd_sched_fence_ops;
static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
{
struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base);
if (__f->base.ops == &amd_sched_fence_ops)
return __f;
return NULL;
}
/** /**
* Define the backend operations called by the scheduler, * Define the backend operations called by the scheduler,
* these functions should be implemented in driver side * these functions should be implemented in driver side
...@@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched); ...@@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched);
int amd_sched_push_job(struct amd_gpu_scheduler *sched, int amd_sched_push_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *c_entity, struct amd_sched_entity *c_entity,
void *data); void *data,
struct amd_sched_fence **fence);
int amd_sched_wait_emit(struct amd_sched_entity *c_entity, int amd_sched_wait_emit(struct amd_sched_entity *c_entity,
uint64_t seq, uint64_t seq,
...@@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); ...@@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq);
uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity);
struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity);
void amd_sched_fence_signal(struct amd_sched_fence *fence);
#endif #endif
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*
*/
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <drm/drmP.h>
#include "gpu_scheduler.h"
static void amd_sched_fence_wait_cb(struct fence *f, struct fence_cb *cb)
{
struct amd_sched_fence *fence =
container_of(cb, struct amd_sched_fence, cb);
list_del_init(&fence->list);
fence_put(&fence->base);
}
struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity)
{
struct amd_sched_fence *fence = NULL;
fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
if (fence == NULL)
return NULL;
fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq);
fence->entity = s_entity;
spin_lock_init(&fence->lock);
fence_init(&fence->base, &amd_sched_fence_ops,
&fence->lock,
s_entity->fence_context,
fence->v_seq);
fence_get(&fence->base);
list_add_tail(&fence->list, &s_entity->fence_list);
if (fence_add_callback(&fence->base,&fence->cb,
amd_sched_fence_wait_cb)) {
fence_put(&fence->base);
kfree(fence);
return NULL;
}
return fence;
}
bool amd_sched_check_ts(struct amd_sched_entity *s_entity, uint64_t v_seq)
{
return atomic64_read(&s_entity->last_signaled_v_seq) >= v_seq ? true : false;
}
void amd_sched_fence_signal(struct amd_sched_fence *fence)
{
if (amd_sched_check_ts(fence->entity, fence->v_seq)) {
int ret = fence_signal_locked(&fence->base);
if (!ret)
FENCE_TRACE(&fence->base, "signaled from irq context\n");
else
FENCE_TRACE(&fence->base, "was already signaled\n");
} else
WARN(true, "fence process dismattch with job!\n");
}
static const char *amd_sched_fence_get_driver_name(struct fence *fence)
{
return "amd_sched";
}
static const char *amd_sched_fence_get_timeline_name(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
return (const char *)fence->entity->name;
}
static bool amd_sched_fence_enable_signaling(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
return !amd_sched_check_ts(fence->entity, fence->v_seq);
}
static bool amd_sched_fence_is_signaled(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
return amd_sched_check_ts(fence->entity, fence->v_seq);
}
const struct fence_ops amd_sched_fence_ops = {
.get_driver_name = amd_sched_fence_get_driver_name,
.get_timeline_name = amd_sched_fence_get_timeline_name,
.enable_signaling = amd_sched_fence_enable_signaling,
.signaled = amd_sched_fence_is_signaled,
.wait = fence_default_wait,
.release = NULL,
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment