Commit 2624dd15 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: add timeline support in amdgpu CS v3

syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj
Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Cc: Tobias Hector <Tobias.Hector@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ecc4946f
...@@ -436,6 +436,12 @@ struct amdgpu_cs_chunk { ...@@ -436,6 +436,12 @@ struct amdgpu_cs_chunk {
void *kdata; void *kdata;
}; };
struct amdgpu_cs_post_dep {
struct drm_syncobj *syncobj;
struct dma_fence_chain *chain;
u64 point;
};
struct amdgpu_cs_parser { struct amdgpu_cs_parser {
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct drm_file *filp; struct drm_file *filp;
...@@ -465,8 +471,8 @@ struct amdgpu_cs_parser { ...@@ -465,8 +471,8 @@ struct amdgpu_cs_parser {
/* user fence */ /* user fence */
struct amdgpu_bo_list_entry uf_entry; struct amdgpu_bo_list_entry uf_entry;
unsigned num_post_dep_syncobjs; unsigned num_post_deps;
struct drm_syncobj **post_dep_syncobjs; struct amdgpu_cs_post_dep *post_deps;
}; };
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
......
...@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs ...@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
break; break;
default: default:
...@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, ...@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
ttm_eu_backoff_reservation(&parser->ticket, ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated); &parser->validated);
for (i = 0; i < parser->num_post_dep_syncobjs; i++) for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_dep_syncobjs[i]); drm_syncobj_put(parser->post_deps[i].syncobj);
kfree(parser->post_dep_syncobjs); kfree(parser->post_deps[i].chain);
}
kfree(parser->post_deps);
dma_fence_put(parser->fence); dma_fence_put(parser->fence);
...@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, ...@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
} }
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
uint32_t handle) uint32_t handle, u64 point,
u64 flags)
{ {
int r;
struct dma_fence *fence; struct dma_fence *fence;
r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); int r;
if (r)
r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
if (r) {
DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
handle, point, r);
return r; return r;
}
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
dma_fence_put(fence); dma_fence_put(fence);
...@@ -1134,46 +1143,118 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, ...@@ -1134,46 +1143,118 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_sem *deps;
unsigned num_deps; unsigned num_deps;
int i, r; int i, r;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem); sizeof(struct drm_amdgpu_cs_chunk_sem);
for (i = 0; i < num_deps; ++i) {
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
0, 0);
if (r)
return r;
}
return 0;
}
static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
unsigned num_deps;
int i, r;
syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
for (i = 0; i < num_deps; ++i) { for (i = 0; i < num_deps; ++i) {
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); r = amdgpu_syncobj_lookup_and_add_to_sync(p,
syncobj_deps[i].handle,
syncobj_deps[i].point,
syncobj_deps[i].flags);
if (r) if (r)
return r; return r;
} }
return 0; return 0;
} }
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk) struct amdgpu_cs_chunk *chunk)
{ {
struct drm_amdgpu_cs_chunk_sem *deps;
unsigned num_deps; unsigned num_deps;
int i; int i;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 / num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem); sizeof(struct drm_amdgpu_cs_chunk_sem);
p->post_dep_syncobjs = kmalloc_array(num_deps, p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
sizeof(struct drm_syncobj *), GFP_KERNEL);
GFP_KERNEL); p->num_post_deps = 0;
p->num_post_dep_syncobjs = 0;
if (!p->post_dep_syncobjs) if (!p->post_deps)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < num_deps; ++i) { for (i = 0; i < num_deps; ++i) {
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); p->post_deps[i].syncobj =
if (!p->post_dep_syncobjs[i]) drm_syncobj_find(p->filp, deps[i].handle);
if (!p->post_deps[i].syncobj)
return -EINVAL; return -EINVAL;
p->num_post_dep_syncobjs++; p->post_deps[i].chain = NULL;
p->post_deps[i].point = 0;
p->num_post_deps++;
} }
return 0;
}
static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk
*chunk)
{
struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
unsigned num_deps;
int i;
syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
GFP_KERNEL);
p->num_post_deps = 0;
if (!p->post_deps)
return -ENOMEM;
for (i = 0; i < num_deps; ++i) {
struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
dep->chain = NULL;
if (syncobj_deps[i].point) {
dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
if (!dep->chain)
return -ENOMEM;
}
dep->syncobj = drm_syncobj_find(p->filp,
syncobj_deps[i].handle);
if (!dep->syncobj) {
kfree(dep->chain);
return -EINVAL;
}
dep->point = syncobj_deps[i].point;
p->num_post_deps++;
}
return 0; return 0;
} }
...@@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, ...@@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
chunk = &p->chunks[i]; chunk = &p->chunks[i];
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || switch (chunk->chunk_id) {
chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk); r = amdgpu_cs_process_fence_dep(p, chunk);
if (r) if (r)
return r; return r;
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { break;
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
r = amdgpu_cs_process_syncobj_in_dep(p, chunk); r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r) if (r)
return r; return r;
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { break;
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
r = amdgpu_cs_process_syncobj_out_dep(p, chunk); r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r) if (r)
return r; return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
if (r)
return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
if (r)
return r;
break;
} }
} }
...@@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) ...@@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
{ {
int i; int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i) for (i = 0; i < p->num_post_deps; ++i) {
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); if (p->post_deps[i].chain && p->post_deps[i].point) {
drm_syncobj_add_point(p->post_deps[i].syncobj,
p->post_deps[i].chain,
p->fence, p->post_deps[i].point);
p->post_deps[i].chain = NULL;
} else {
drm_syncobj_replace_fence(p->post_deps[i].syncobj,
p->fence);
}
}
} }
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
......
...@@ -528,6 +528,8 @@ struct drm_amdgpu_gem_va { ...@@ -528,6 +528,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09
struct drm_amdgpu_cs_chunk { struct drm_amdgpu_cs_chunk {
__u32 chunk_id; __u32 chunk_id;
...@@ -608,6 +610,12 @@ struct drm_amdgpu_cs_chunk_sem { ...@@ -608,6 +610,12 @@ struct drm_amdgpu_cs_chunk_sem {
__u32 handle; __u32 handle;
}; };
struct drm_amdgpu_cs_chunk_syncobj {
__u32 handle;
__u32 flags;
__u64 point;
};
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment