Commit eca13f3c authored by Christian König's avatar Christian König

drm/amdgpu: use the last IB as gang leader v2

It turned out that not the last IB specified is the gang leader,
but instead the last job allocated.

This is a bit unfortunate and not very intuitive for the CS
interface, so try to fix this.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221115094206.6181-1-christian.koenig@amd.comTested-by: default avatarTimur Kristóf <timur.kristof@gmail.com>
Acked-by: default avatarTimur Kristóf <timur.kristof@gmail.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Fixes: 4624459c ("drm/amdgpu: add gang submit frontend v6")
parent e17a025a
...@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, ...@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
return r; return r;
++(num_ibs[r]); ++(num_ibs[r]);
p->gang_leader_idx = r;
return 0; return 0;
} }
...@@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, ...@@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (ret) if (ret)
goto free_all_kdata; goto free_all_kdata;
} }
p->gang_leader = p->jobs[p->gang_size - 1]; p->gang_leader = p->jobs[p->gang_leader_idx];
if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
ret = -ECANCELED; ret = -ECANCELED;
...@@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) ...@@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return r; return r;
} }
for (i = 0; i < p->gang_size - 1; ++i) { for (i = 0; i < p->gang_size; ++i) {
if (p->jobs[i] == leader)
continue;
r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
if (r) if (r)
return r; return r;
} }
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
if (r && r != -ERESTARTSYS) if (r && r != -ERESTARTSYS)
DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
return r; return r;
} }
...@@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
for (i = 0; i < p->gang_size; ++i) for (i = 0; i < p->gang_size; ++i)
drm_sched_job_arm(&p->jobs[i]->base); drm_sched_job_arm(&p->jobs[i]->base);
for (i = 0; i < (p->gang_size - 1); ++i) { for (i = 0; i < p->gang_size; ++i) {
struct dma_fence *fence; struct dma_fence *fence;
if (p->jobs[i] == leader)
continue;
fence = &p->jobs[i]->base.s_fence->scheduled; fence = &p->jobs[i]->base.s_fence->scheduled;
r = amdgpu_sync_fence(&leader->sync, fence); r = amdgpu_sync_fence(&leader->sync, fence);
if (r) if (r)
...@@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
list_for_each_entry(e, &p->validated, tv.head) { list_for_each_entry(e, &p->validated, tv.head) {
/* Everybody except for the gang leader uses READ */ /* Everybody except for the gang leader uses READ */
for (i = 0; i < (p->gang_size - 1); ++i) { for (i = 0; i < p->gang_size; ++i) {
if (p->jobs[i] == leader)
continue;
dma_resv_add_fence(e->tv.bo->base.resv, dma_resv_add_fence(e->tv.bo->base.resv,
&p->jobs[i]->base.s_fence->finished, &p->jobs[i]->base.s_fence->finished,
DMA_RESV_USAGE_READ); DMA_RESV_USAGE_READ);
...@@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
e->tv.num_shared = 0; e->tv.num_shared = 0;
} }
seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
p->fence); p->fence);
amdgpu_cs_post_dependencies(p); amdgpu_cs_post_dependencies(p);
......
...@@ -54,6 +54,7 @@ struct amdgpu_cs_parser { ...@@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
/* scheduler job objects */ /* scheduler job objects */
unsigned int gang_size; unsigned int gang_size;
unsigned int gang_leader_idx;
struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
struct amdgpu_job *gang_leader; struct amdgpu_job *gang_leader;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment