Commit 977f7e10 authored by Nirmoy Das's avatar Nirmoy Das Committed by Alex Deucher

drm/amdgpu: allocate entities on demand

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entity
only when needed.

v2: allocate memory for entity and fences together
Signed-off-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 18c6b74e
...@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { ...@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1, [AMDGPU_HW_IP_VCN_JPEG] = 1,
}; };
static int amdgpu_ctx_total_num_entities(void)
{
unsigned i, num_entities = 0;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
num_entities += amdgpu_ctx_num_entities[i];
return num_entities;
}
static int amdgpu_ctx_priority_permit(struct drm_file *filp, static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority) enum drm_sched_priority priority)
{ {
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
return -EINVAL;
/* NORMAL and below are accessible by everyone */ /* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL) if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0; return 0;
...@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, ...@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES; return -EACCES;
} }
static int amdgpu_ctx_init(struct amdgpu_device *adev, static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
enum drm_sched_priority priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{ {
unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev;
unsigned i, j; struct amdgpu_ctx_entity *entity;
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
unsigned num_scheds = 0;
enum drm_sched_priority priority;
int r; int r;
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
return -EINVAL; GFP_KERNEL);
if (!entity)
r = amdgpu_ctx_priority_permit(filp, priority); return -ENOMEM;
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
ctx->entities[0] = kcalloc(num_entities,
sizeof(struct amdgpu_ctx_entity),
GFP_KERNEL);
if (!ctx->entities[0])
return -ENOMEM;
for (i = 0; i < num_entities; ++i) { entity->sequence = 1;
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
entity->sequence = 1; switch (hw_ip) {
entity->fences = kcalloc(amdgpu_sched_jobs,
sizeof(struct dma_fence*), GFP_KERNEL);
if (!entity->fences) {
r = -ENOMEM;
goto error_cleanup_memory;
}
}
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
ctx->entities[i] = ctx->entities[i - 1] +
amdgpu_ctx_num_entities[i - 1];
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
struct drm_gpu_scheduler **scheds;
struct drm_gpu_scheduler *sched;
unsigned num_scheds = 0;
switch (i) {
case AMDGPU_HW_IP_GFX: case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched; sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched; scheds = &sched;
...@@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ...@@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched; scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched; num_scheds = adev->jpeg.num_jpeg_sched;
break; break;
}
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
r = drm_sched_entity_init(&ctx->entities[i][j].entity,
priority, scheds,
num_scheds, &ctx->guilty);
if (r)
goto error_cleanup_entities;
} }
r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
&ctx->guilty);
if (r)
goto error_free_entity;
ctx->entities[hw_ip][ring] = entity;
return 0; return 0;
error_cleanup_entities: error_free_entity:
for (i = 0; i < num_entities; ++i) kfree(entity);
drm_sched_entity_destroy(&ctx->entities[0][i].entity);
error_cleanup_memory: return r;
for (i = 0; i < num_entities; ++i) { }
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
kfree(entity->fences); static int amdgpu_ctx_init(struct amdgpu_device *adev,
entity->fences = NULL; enum drm_sched_priority priority,
} struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
int r;
kfree(ctx->entities[0]); r = amdgpu_ctx_priority_permit(filp, priority);
ctx->entities[0] = NULL; if (r)
return r; return r;
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
return 0;
}
static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{
int i;
if (!entity)
return;
for (i = 0; i < amdgpu_sched_jobs; ++i)
dma_fence_put(entity->fences[i]);
kfree(entity);
} }
static void amdgpu_ctx_fini(struct kref *ref) static void amdgpu_ctx_fini(struct kref *ref)
{ {
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev; struct amdgpu_device *adev = ctx->adev;
unsigned i, j; unsigned i, j;
if (!adev) if (!adev)
return; return;
for (i = 0; i < num_entities; ++i) { for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
amdgpu_ctx_fini_entity(ctx->entities[i][j]);
for (j = 0; j < amdgpu_sched_jobs; ++j) ctx->entities[i][j] = NULL;
dma_fence_put(entity->fences[j]); }
kfree(entity->fences);
} }
kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock); mutex_destroy(&ctx->lock);
kfree(ctx); kfree(ctx);
} }
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity) u32 ring, struct drm_sched_entity **entity)
{ {
int r;
if (hw_ip >= AMDGPU_HW_IP_NUM) { if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip); DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL; return -EINVAL;
...@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, ...@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL; return -EINVAL;
} }
*entity = &ctx->entities[hw_ip][ring].entity; if (ctx->entities[hw_ip][ring] == NULL) {
r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
if (r)
return r;
}
*entity = &ctx->entities[hw_ip][ring]->entity;
return 0; return 0;
} }
...@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, ...@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref) static void amdgpu_ctx_do_release(struct kref *ref)
{ {
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
unsigned num_entities; u32 i, j;
u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount); ctx = container_of(ref, struct amdgpu_ctx, refcount);
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
if (!ctx->entities[i][j])
continue;
num_entities = amdgpu_ctx_total_num_entities(); drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
for (i = 0; i < num_entities; i++) }
drm_sched_entity_destroy(&ctx->entities[0][i].entity); }
amdgpu_ctx_fini(ref); amdgpu_ctx_fini(ref);
} }
...@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, ...@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority) enum drm_sched_priority priority)
{ {
unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio; enum drm_sched_priority ctx_prio;
unsigned i; unsigned i, j;
ctx->override_priority = priority; ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority; ctx->init_priority : ctx->override_priority;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
struct drm_sched_entity *entity;
for (i = 0; i < num_entities; i++) { if (!ctx->entities[i][j])
struct drm_sched_entity *entity = &ctx->entities[0][i].entity; continue;
drm_sched_entity_set_priority(entity, ctx_prio); entity = &ctx->entities[i][j]->entity;
drm_sched_entity_set_priority(entity, ctx_prio);
}
} }
} }
...@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) ...@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{ {
unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct idr *idp; struct idr *idp;
uint32_t id, i; uint32_t id, i, j;
idp = &mgr->ctx_handles; idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock); mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) { idr_for_each_entry(idp, ctx, id) {
for (i = 0; i < num_entities; i++) { for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
struct drm_sched_entity *entity; for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
struct drm_sched_entity *entity;
if (!ctx->entities[i][j])
continue;
entity = &ctx->entities[0][i].entity; entity = &ctx->entities[i][j]->entity;
timeout = drm_sched_entity_flush(entity, timeout); timeout = drm_sched_entity_flush(entity, timeout);
}
} }
} }
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);
...@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) ...@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{ {
unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct idr *idp; struct idr *idp;
uint32_t id, i; uint32_t id, i, j;
idp = &mgr->ctx_handles; idp = &mgr->ctx_handles;
...@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) ...@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue; continue;
} }
for (i = 0; i < num_entities; i++) for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
drm_sched_entity_fini(&ctx->entities[0][i].entity); for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
struct drm_sched_entity *entity;
if (!ctx->entities[i][j])
continue;
entity = &ctx->entities[i][j]->entity;
drm_sched_entity_fini(entity);
}
}
} }
} }
......
...@@ -29,10 +29,12 @@ struct drm_device; ...@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file; struct drm_file;
struct amdgpu_fpriv; struct amdgpu_fpriv;
#define AMDGPU_MAX_ENTITY_NUM 4
struct amdgpu_ctx_entity { struct amdgpu_ctx_entity {
uint64_t sequence; uint64_t sequence;
struct dma_fence **fences;
struct drm_sched_entity entity; struct drm_sched_entity entity;
struct dma_fence *fences[];
}; };
struct amdgpu_ctx { struct amdgpu_ctx {
...@@ -42,7 +44,7 @@ struct amdgpu_ctx { ...@@ -42,7 +44,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query; unsigned reset_counter_query;
uint32_t vram_lost_counter; uint32_t vram_lost_counter;
spinlock_t ring_lock; spinlock_t ring_lock;
struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented; bool preamble_presented;
enum drm_sched_priority init_priority; enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority; enum drm_sched_priority override_priority;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment