Commit 507d977f authored by Chris Wilson's avatar Chris Wilson

drm/i915: Pass vma to relocate entry

We can simplify our tracking of pending writes in an execbuf to the
single bit in the vma->exec_entry->flags, but that requires the
relocation function knowing the object's vma. Pass it along.

Note we have only been using a single bit to track flushing since

commit cc889e0f
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Wed Jun 13 20:45:19 2012 +0200

    drm/i915: disable flushing_list/gpu_write_list

unconditionally flushed all render caches before the breadcrumb and

commit 6ac42f41
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Sat Jul 21 12:25:01 2012 +0200

    drm/i915: Replace the complex flushing logic with simple invalidate/flush all

did away with the explicit GPU domain tracking. This was then codified
into the ABI with NO_RELOC in

commit ed5982e6
Author: Daniel Vetter <daniel.vetter@ffwll.ch> # Oi! Patch stealer!
Date:   Thu Jan 17 22:23:36 2013 +0100

    drm/i915: Allow userspace to hint that the relocations were known
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent 4ff4b44c
...@@ -622,42 +622,25 @@ relocate_entry(struct drm_i915_gem_object *obj, ...@@ -622,42 +622,25 @@ relocate_entry(struct drm_i915_gem_object *obj,
} }
static int static int
eb_relocate_entry(struct drm_i915_gem_object *obj, eb_relocate_entry(struct i915_vma *vma,
struct i915_execbuffer *eb, struct i915_execbuffer *eb,
struct drm_i915_gem_relocation_entry *reloc) struct drm_i915_gem_relocation_entry *reloc)
{ {
struct drm_gem_object *target_obj; struct i915_vma *target;
struct drm_i915_gem_object *target_i915_obj; u64 target_offset;
struct i915_vma *target_vma;
uint64_t target_offset;
int ret; int ret;
/* we've already hold a reference to all valid objects */ /* we've already hold a reference to all valid objects */
target_vma = eb_get_vma(eb, reloc->target_handle); target = eb_get_vma(eb, reloc->target_handle);
if (unlikely(target_vma == NULL)) if (unlikely(!target))
return -ENOENT; return -ENOENT;
target_i915_obj = target_vma->obj;
target_obj = &target_vma->obj->base;
target_offset = gen8_canonical_addr(target_vma->node.start);
/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
* pipe_control writes because the gpu doesn't properly redirect them
* through the ppgtt for non_secure batchbuffers. */
if (unlikely(IS_GEN6(eb->i915) &&
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
PIN_GLOBAL);
if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
return ret;
}
/* Validate that the target is in a valid r/w GPU domain */ /* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
DRM_DEBUG("reloc with multiple write domains: " DRM_DEBUG("reloc with multiple write domains: "
"obj %p target %d offset %d " "target %d offset %d "
"read %08x write %08x", "read %08x write %08x",
obj, reloc->target_handle, reloc->target_handle,
(int) reloc->offset, (int) reloc->offset,
reloc->read_domains, reloc->read_domains,
reloc->write_domain); reloc->write_domain);
...@@ -666,43 +649,57 @@ eb_relocate_entry(struct drm_i915_gem_object *obj, ...@@ -666,43 +649,57 @@ eb_relocate_entry(struct drm_i915_gem_object *obj,
if (unlikely((reloc->write_domain | reloc->read_domains) if (unlikely((reloc->write_domain | reloc->read_domains)
& ~I915_GEM_GPU_DOMAINS)) { & ~I915_GEM_GPU_DOMAINS)) {
DRM_DEBUG("reloc with read/write non-GPU domains: " DRM_DEBUG("reloc with read/write non-GPU domains: "
"obj %p target %d offset %d " "target %d offset %d "
"read %08x write %08x", "read %08x write %08x",
obj, reloc->target_handle, reloc->target_handle,
(int) reloc->offset, (int) reloc->offset,
reloc->read_domains, reloc->read_domains,
reloc->write_domain); reloc->write_domain);
return -EINVAL; return -EINVAL;
} }
target_obj->pending_read_domains |= reloc->read_domains; if (reloc->write_domain)
target_obj->pending_write_domain |= reloc->write_domain; target->exec_entry->flags |= EXEC_OBJECT_WRITE;
/*
* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
* pipe_control writes because the gpu doesn't properly redirect them
* through the ppgtt for non_secure batchbuffers.
*/
if (unlikely(IS_GEN6(eb->i915) &&
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
ret = i915_vma_bind(target, target->obj->cache_level,
PIN_GLOBAL);
if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
return ret;
}
/* If the relocation already has the right value in it, no /* If the relocation already has the right value in it, no
* more work needs to be done. * more work needs to be done.
*/ */
target_offset = gen8_canonical_addr(target->node.start);
if (target_offset == reloc->presumed_offset) if (target_offset == reloc->presumed_offset)
return 0; return 0;
/* Check that the relocation address is valid... */ /* Check that the relocation address is valid... */
if (unlikely(reloc->offset > if (unlikely(reloc->offset >
obj->base.size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
DRM_DEBUG("Relocation beyond object bounds: " DRM_DEBUG("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n", "target %d offset %d size %d.\n",
obj, reloc->target_handle, reloc->target_handle,
(int) reloc->offset, (int)reloc->offset,
(int) obj->base.size); (int)vma->size);
return -EINVAL; return -EINVAL;
} }
if (unlikely(reloc->offset & 3)) { if (unlikely(reloc->offset & 3)) {
DRM_DEBUG("Relocation not 4-byte aligned: " DRM_DEBUG("Relocation not 4-byte aligned: "
"obj %p target %d offset %d.\n", "target %d offset %d.\n",
obj, reloc->target_handle, reloc->target_handle,
(int) reloc->offset); (int)reloc->offset);
return -EINVAL; return -EINVAL;
} }
ret = relocate_entry(obj, reloc, &eb->reloc_cache, target_offset); ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset);
if (ret) if (ret)
return ret; return ret;
...@@ -748,7 +745,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) ...@@ -748,7 +745,7 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb)
do { do {
u64 offset = r->presumed_offset; u64 offset = r->presumed_offset;
ret = eb_relocate_entry(vma->obj, eb, r); ret = eb_relocate_entry(vma, eb, r);
if (ret) if (ret)
goto out; goto out;
...@@ -794,7 +791,7 @@ eb_relocate_vma_slow(struct i915_vma *vma, ...@@ -794,7 +791,7 @@ eb_relocate_vma_slow(struct i915_vma *vma,
int i, ret = 0; int i, ret = 0;
for (i = 0; i < entry->relocation_count; i++) { for (i = 0; i < entry->relocation_count; i++) {
ret = eb_relocate_entry(vma->obj, eb, &relocs[i]); ret = eb_relocate_entry(vma, eb, &relocs[i]);
if (ret) if (ret)
break; break;
} }
...@@ -827,7 +824,6 @@ eb_reserve_vma(struct i915_vma *vma, ...@@ -827,7 +824,6 @@ eb_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *engine, struct intel_engine_cs *engine,
bool *need_reloc) bool *need_reloc)
{ {
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
uint64_t flags; uint64_t flags;
int ret; int ret;
...@@ -881,11 +877,6 @@ eb_reserve_vma(struct i915_vma *vma, ...@@ -881,11 +877,6 @@ eb_reserve_vma(struct i915_vma *vma,
*need_reloc = true; *need_reloc = true;
} }
if (entry->flags & EXEC_OBJECT_WRITE) {
obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
}
return 0; return 0;
} }
...@@ -948,7 +939,6 @@ static int eb_reserve(struct i915_execbuffer *eb) ...@@ -948,7 +939,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
{ {
const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4;
const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment;
struct drm_i915_gem_object *obj;
struct i915_vma *vma; struct i915_vma *vma;
struct list_head ordered_vmas; struct list_head ordered_vmas;
struct list_head pinned_vmas; struct list_head pinned_vmas;
...@@ -961,7 +951,6 @@ static int eb_reserve(struct i915_execbuffer *eb) ...@@ -961,7 +951,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
bool need_fence, need_mappable; bool need_fence, need_mappable;
vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link);
obj = vma->obj;
entry = vma->exec_entry; entry = vma->exec_entry;
if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) if (eb->ctx->flags & CONTEXT_NO_ZEROMAP)
...@@ -982,9 +971,6 @@ static int eb_reserve(struct i915_execbuffer *eb) ...@@ -982,9 +971,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
list_move(&vma->exec_link, &ordered_vmas); list_move(&vma->exec_link, &ordered_vmas);
} else } else
list_move_tail(&vma->exec_link, &ordered_vmas); list_move_tail(&vma->exec_link, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
obj->base.pending_write_domain = 0;
} }
list_splice(&ordered_vmas, &eb->vmas); list_splice(&ordered_vmas, &eb->vmas);
list_splice(&pinned_vmas, &eb->vmas); list_splice(&pinned_vmas, &eb->vmas);
...@@ -1170,7 +1156,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -1170,7 +1156,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
i915_gem_clflush_object(obj, 0); i915_gem_clflush_object(obj, 0);
ret = i915_gem_request_await_object ret = i915_gem_request_await_object
(eb->request, obj, obj->base.pending_write_domain); (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -1366,12 +1352,10 @@ eb_move_to_active(struct i915_execbuffer *eb) ...@@ -1366,12 +1352,10 @@ eb_move_to_active(struct i915_execbuffer *eb)
list_for_each_entry(vma, &eb->vmas, exec_link) { list_for_each_entry(vma, &eb->vmas, exec_link) {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
obj->base.write_domain = obj->base.pending_write_domain; obj->base.write_domain = 0;
if (obj->base.write_domain) if (vma->exec_entry->flags & EXEC_OBJECT_WRITE)
vma->exec_entry->flags |= EXEC_OBJECT_WRITE; obj->base.read_domains = 0;
else obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
obj->base.pending_read_domains |= obj->base.read_domains;
obj->base.read_domains = obj->base.pending_read_domains;
i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags);
eb_export_fence(obj, eb->request, vma->exec_entry->flags); eb_export_fence(obj, eb->request, vma->exec_entry->flags);
...@@ -1681,8 +1665,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -1681,8 +1665,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err; goto err;
} }
/* Set the pending read domains for the batch buffer to COMMAND */ if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) {
if (eb.batch->obj->base.pending_write_domain) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
...@@ -1719,7 +1702,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -1719,7 +1702,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
} }
} }
eb.batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
if (eb.batch_len == 0) if (eb.batch_len == 0)
eb.batch_len = eb.batch->size - eb.batch_start_offset; eb.batch_len = eb.batch->size - eb.batch_start_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment