drm/amdgpu: fix pipeline sync v2

This fixes a potential memory leak of dma_fence objects in the CS code as well as glitches in firefox because of missing pipeline sync. v2: use the scheduler instead of the fence context Signed-off-by: Christian König <christian.koenig@amd.com> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2323 Tested-by: Michal Kubecek mkubecek@suse.cz Tested-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230109130120.73389-1-christian.koenig@amd.com

drm/amdgpu: fix pipeline sync v2
This fixes a potential memory leak of dma_fence objects in the CS code as well as glitches in firefox because of missing pipeline sync. v2: use the scheduler instead of the fence context Signed-off-by: Christian König <christian.koenig@amd.com> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2323 Tested-by: Michal Kubecek mkubecek@suse.cz Tested-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230109130120.73389-1-christian.koenig@amd.com
3bd68b32 · Christian König · a309c719 · 3bd68b32
Commit 3bd68b32 authored Jan 09, 2023 by Christian König
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 16 deletions

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +30 -16

No files found.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -61,6 +61,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
 		amdgpu_ctx_put(p->ctx);
 		return -ECANCELED;
 	}
+
+	amdgpu_sync_create(&p->sync);
 	return 0;
 }

@@ -452,18 +454,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
 	}

 	r = amdgpu_sync_fence(&p->sync, fence);
-	if (r)
-		goto error;
-
-	/*
-	 * When we have an explicit dependency it might be necessary to insert a
-	 * pipeline sync to make sure that all caches etc are flushed and the
-	 * next job actually sees the results from the previous one.
-	 */
-	if (fence->context == p->gang_leader->base.entity->fence_context)
-		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
-
-error:
 	dma_fence_put(fence);
 	return r;
 }
@@ -1188,10 +1178,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct drm_gpu_scheduler *sched;
 	struct amdgpu_bo_list_entry *e;
+	struct dma_fence *fence;
 	unsigned int i;
 	int r;

+	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
+	if (r) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+		return r;
+	}
+
 	list_for_each_entry(e, &p->validated, tv.head) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 		struct dma_resv *resv = bo->tbo.base.resv;
@@ -1211,10 +1210,24 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 			return r;
 	}

-	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
-	if (r && r != -ERESTARTSYS)
-		DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+	sched = p->gang_leader->base.entity->rq->sched;
+	while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+		struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+		/*
+		 * When we have an dependency it might be necessary to insert a
+		 * pipeline sync to make sure that all caches etc are flushed and the
+		 * next job actually sees the results from the previous one
+		 * before we start executing on the same scheduler ring.
+		 */
+		if (!s_fence || s_fence->sched != sched)
+			continue;
+
+		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+		if (r)
 			return r;
+	}
+	return 0;
 }

 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1347,6 +1360,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
 {
 	unsigned i;

+	amdgpu_sync_free(&parser->sync);
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
 		kfree(parser->post_deps[i].chain);