Commit a88b6e4c authored by Chris Wilson's avatar Chris Wilson

drm/i915: Allow specification of parallel execbuf

There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.
Suggested-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Link: https://github.com/intel/media-driver/pull/546Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-10-chris@chris-wilson.co.uk
parent ee113690
...@@ -443,6 +443,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, ...@@ -443,6 +443,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_CAPTURE: case I915_PARAM_HAS_EXEC_CAPTURE:
case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_BATCH_FIRST:
case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
/* For the time being all of these are always true; /* For the time being all of these are always true;
* if some supported hardware does not have one of these * if some supported hardware does not have one of these
* features this value needs to be provided from * features this value needs to be provided from
......
...@@ -2318,6 +2318,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2318,6 +2318,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
{ {
struct i915_execbuffer eb; struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL; struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL; struct sync_file *out_fence = NULL;
int out_fence_fd = -1; int out_fence_fd = -1;
int err; int err;
...@@ -2360,11 +2361,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2360,11 +2361,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
return -EINVAL; return -EINVAL;
} }
if (args->flags & I915_EXEC_FENCE_SUBMIT) {
if (in_fence) {
err = -EINVAL;
goto err_in_fence;
}
exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!exec_fence) {
err = -EINVAL;
goto err_in_fence;
}
}
if (args->flags & I915_EXEC_FENCE_OUT) { if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC); out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) { if (out_fence_fd < 0) {
err = out_fence_fd; err = out_fence_fd;
goto err_in_fence; goto err_exec_fence;
} }
} }
...@@ -2494,6 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2494,6 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_request; goto err_request;
} }
if (exec_fence) {
err = i915_request_await_execution(eb.request, exec_fence,
eb.engine->bond_execute);
if (err < 0)
goto err_request;
}
if (fences) { if (fences) {
err = await_fence_array(&eb, fences); err = await_fence_array(&eb, fences);
if (err) if (err)
...@@ -2555,6 +2576,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2555,6 +2576,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err_out_fence: err_out_fence:
if (out_fence_fd != -1) if (out_fence_fd != -1)
put_unused_fd(out_fence_fd); put_unused_fd(out_fence_fd);
err_exec_fence:
dma_fence_put(exec_fence);
err_in_fence: err_in_fence:
dma_fence_put(in_fence); dma_fence_put(in_fence);
return err; return err;
......
...@@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait { ...@@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait {
*/ */
#define I915_PARAM_MMAP_GTT_COHERENT 52 #define I915_PARAM_MMAP_GTT_COHERENT 52
/*
* Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
* execution through use of explicit fence support.
* See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
*/
#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
/* Must be kept compact -- no holes and well documented */ /* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam { typedef struct drm_i915_getparam {
...@@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 { ...@@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
*/ */
#define I915_EXEC_FENCE_ARRAY (1<<19) #define I915_EXEC_FENCE_ARRAY (1<<19)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) /*
* Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
* a sync_file fd to wait upon (in a nonblocking manner) prior to executing
* the batch.
*
* Returns -EINVAL if the sync_file fd cannot be found.
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \ #define i915_execbuffer2_set_context_id(eb2, context) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment