Commit e6a84468 authored by Chris Wilson's avatar Chris Wilson Committed by Daniel Vetter

drm/i915: Force CPU relocations if not GTT mapped

Move the decision on whether we need to have a mappable object during
execbuffer to the fore and then reuse that decision by propagating the
flag through to reservation. As a corollary, before doing the actual
relocation through the GTT, we can make sure that we do have a GTT
mapping through which to operate.

Note that the key to make this work is to ditch the
obj->map_and_fenceable unbind optimization - with full ppgtt it
doesn't make a lot of sense any more anyway.

v2: Revamp and resend to ease future patches.
v3: Refresh patch rationale

References: https://bugs.freedesktop.org/show_bug.cgi?id=81094Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
[danvet: Explain why obj->map_and_fenceable is key and split out the
secure batch fix.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent dc8cd1e7
...@@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma) ...@@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma)
vma->unbind_vma(vma); vma->unbind_vma(vma);
list_del_init(&vma->mm_list); list_del_init(&vma->mm_list);
/* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm)) if (i915_is_ggtt(vma->vm))
obj->map_and_fenceable = true; obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node); drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma); i915_gem_vma_destroy(vma);
...@@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) ...@@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0; return 0;
} }
} else if (enable) { } else if (enable) {
if (WARN_ON(!obj->map_and_fenceable))
return -EINVAL;
reg = i915_find_fence_reg(dev); reg = i915_find_fence_reg(dev);
if (IS_ERR(reg)) if (IS_ERR(reg))
return PTR_ERR(reg); return PTR_ERR(reg);
...@@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, ...@@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE; obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED; obj->madv = I915_MADV_WILLNEED;
/* Avoid an unnecessary call to unbind on the first bind. */
obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
} }
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30) #define __EXEC_OBJECT_HAS_FENCE (1<<30)
#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) #define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
#define BATCH_OFFSET_BIAS (256*1024) #define BATCH_OFFSET_BIAS (256*1024)
...@@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) ...@@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
return ret; return ret;
} }
static int
need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
i915_is_ggtt(vma->vm);
}
static int static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring, struct intel_engine_cs *ring,
...@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, ...@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
bool need_fence;
uint64_t flags; uint64_t flags;
int ret; int ret;
flags = 0; flags = 0;
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
if (need_fence || need_reloc_mappable(vma))
flags |= PIN_MAPPABLE; flags |= PIN_MAPPABLE;
if (entry->flags & EXEC_OBJECT_NEEDS_GTT) if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL; flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
...@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, ...@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
} }
static bool static bool
eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) need_reloc_mappable(struct i915_vma *vma)
{ {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
bool need_fence, need_mappable;
need_fence = if (entry->relocation_count == 0)
has_fenced_gpu_access && return false;
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE; if (!i915_is_ggtt(vma->vm))
need_mappable = need_fence || need_reloc_mappable(vma); return false;
/* See also use_cpu_reloc() */
if (HAS_LLC(vma->obj->base.dev))
return false;
if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
return false;
WARN_ON((need_mappable || need_fence) && return true;
}
static bool
eb_vma_misplaced(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_is_ggtt(vma->vm)); !i915_is_ggtt(vma->vm));
if (entry->alignment && if (entry->alignment &&
vma->node.start & (entry->alignment - 1)) vma->node.start & (entry->alignment - 1))
return true; return true;
if (need_mappable && !obj->map_and_fenceable) if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return true; return true;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
...@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, ...@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->tiling_mode != I915_TILING_NONE; obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma); need_mappable = need_fence || need_reloc_mappable(vma);
if (need_mappable) if (need_mappable) {
entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas); list_move(&vma->exec_list, &ordered_vmas);
else } else
list_move_tail(&vma->exec_list, &ordered_vmas); list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
...@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, ...@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (!drm_mm_node_allocated(&vma->node)) if (!drm_mm_node_allocated(&vma->node))
continue; continue;
if (eb_vma_misplaced(vma, has_fenced_gpu_access)) if (eb_vma_misplaced(vma))
ret = i915_vma_unbind(vma); ret = i915_vma_unbind(vma);
else else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment