Commit 1f7fd484 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Replace i915_vma_put_fence()

Avoid calling i915_vma_put_fence() by using our alternate paths that
bind a secondary vma avoiding the original fenced vma. For the few
instances where we need to release the fence (i.e. on binding when the
GGTT range becomes invalid), replace the put_fence with a revoke_fence.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190822061557.18402-1-chris@chris-wilson.co.uk
parent b7d151ba
...@@ -770,10 +770,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, ...@@ -770,10 +770,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
} }
intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB); intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB);
ret = i915_vma_put_fence(vma);
if (ret)
goto out_unpin;
if (!overlay->active) { if (!overlay->active) {
u32 oconfig; u32 oconfig;
......
...@@ -221,6 +221,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -221,6 +221,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* state and so involves less work. * state and so involves less work.
*/ */
if (atomic_read(&obj->bind_count)) { if (atomic_read(&obj->bind_count)) {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
/* Before we change the PTE, the GPU must not be accessing it. /* Before we change the PTE, the GPU must not be accessing it.
* If we wait upon the object, we know that all the bound * If we wait upon the object, we know that all the bound
* VMA are no longer active. * VMA are no longer active.
...@@ -232,18 +234,30 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -232,18 +234,30 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
if (ret) if (ret)
return ret; return ret;
if (!HAS_LLC(to_i915(obj->base.dev)) && if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) {
cache_level != I915_CACHE_NONE) { intel_wakeref_t wakeref =
/* Access to snoopable pages through the GTT is intel_runtime_pm_get(&i915->runtime_pm);
/*
* Access to snoopable pages through the GTT is
* incoherent and on some machines causes a hard * incoherent and on some machines causes a hard
* lockup. Relinquish the CPU mmaping to force * lockup. Relinquish the CPU mmaping to force
* userspace to refault in the pages and we can * userspace to refault in the pages and we can
* then double check if the GTT mapping is still * then double check if the GTT mapping is still
* valid for that pointer access. * valid for that pointer access.
*/ */
i915_gem_object_release_mmap(obj); ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex);
if (ret) {
intel_runtime_pm_put(&i915->runtime_pm,
wakeref);
return ret;
}
if (obj->userfault_count)
__i915_gem_object_release_mmap(obj);
/* As we no longer need a fence for GTT access, /*
* As we no longer need a fence for GTT access,
* we can relinquish it now (and so prevent having * we can relinquish it now (and so prevent having
* to steal a fence from someone else on the next * to steal a fence from someone else on the next
* fence request). Note GPU activity would have * fence request). Note GPU activity would have
...@@ -251,12 +265,17 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ...@@ -251,12 +265,17 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* supposed to be linear. * supposed to be linear.
*/ */
for_each_ggtt_vma(vma, obj) { for_each_ggtt_vma(vma, obj) {
ret = i915_vma_put_fence(vma); ret = i915_vma_revoke_fence(vma);
if (ret) if (ret)
return ret; break;
} }
mutex_unlock(&i915->ggtt.vm.mutex);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
if (ret)
return ret;
} else { } else {
/* We either have incoherent backing store and /*
* We either have incoherent backing store and
* so no GTT access or the architecture is fully * so no GTT access or the architecture is fully
* coherent. In such cases, existing GTT mmaps * coherent. In such cases, existing GTT mmaps
* ignore the cache bit in the PTE and we can * ignore the cache bit in the PTE and we can
......
...@@ -1024,6 +1024,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, ...@@ -1024,6 +1024,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
struct i915_vma *vma; struct i915_vma *vma;
int err; int err;
if (i915_gem_object_is_tiled(obj))
return ERR_PTR(-EINVAL);
if (use_cpu_reloc(cache, obj)) if (use_cpu_reloc(cache, obj))
return NULL; return NULL;
...@@ -1047,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, ...@@ -1047,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err) /* no inactive aperture space, use cpu reloc */ if (err) /* no inactive aperture space, use cpu reloc */
return NULL; return NULL;
} else { } else {
err = i915_vma_put_fence(vma);
if (err) {
i915_vma_unpin(vma);
return ERR_PTR(err);
}
cache->node.start = vma->node.start; cache->node.start = vma->node.start;
cache->node.mm = (void *)vma; cache->node.mm = (void *)vma;
} }
......
...@@ -343,6 +343,8 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, ...@@ -343,6 +343,8 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
return ret; return ret;
wakeref = intel_runtime_pm_get(&i915->runtime_pm); wakeref = intel_runtime_pm_get(&i915->runtime_pm);
vma = ERR_PTR(-ENODEV);
if (!i915_gem_object_is_tiled(obj))
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ | PIN_NONBLOCK /* NOWARN */ |
...@@ -350,13 +352,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, ...@@ -350,13 +352,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
if (!IS_ERR(vma)) { if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma); node.start = i915_ggtt_offset(vma);
node.allocated = false; node.allocated = false;
ret = i915_vma_put_fence(vma); } else {
if (ret) {
i915_vma_unpin(vma);
vma = ERR_PTR(ret);
}
}
if (IS_ERR(vma)) {
ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
...@@ -557,6 +553,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, ...@@ -557,6 +553,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
wakeref = intel_runtime_pm_get(rpm); wakeref = intel_runtime_pm_get(rpm);
} }
vma = ERR_PTR(-ENODEV);
if (!i915_gem_object_is_tiled(obj))
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ | PIN_NONBLOCK /* NOWARN */ |
...@@ -564,13 +562,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, ...@@ -564,13 +562,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
if (!IS_ERR(vma)) { if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma); node.start = i915_ggtt_offset(vma);
node.allocated = false; node.allocated = false;
ret = i915_vma_put_fence(vma); } else {
if (ret) {
i915_vma_unpin(vma);
vma = ERR_PTR(ret);
}
}
if (IS_ERR(vma)) {
ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
if (ret) if (ret)
goto out_rpm; goto out_rpm;
......
...@@ -287,7 +287,7 @@ static int fence_update(struct i915_fence_reg *fence, ...@@ -287,7 +287,7 @@ static int fence_update(struct i915_fence_reg *fence,
} }
/** /**
* i915_vma_put_fence - force-remove fence for a VMA * i915_vma_revoke_fence - force-remove fence for a VMA
* @vma: vma to map linearly (not through a fence reg) * @vma: vma to map linearly (not through a fence reg)
* *
* This function force-removes any fence from the given object, which is useful * This function force-removes any fence from the given object, which is useful
...@@ -297,26 +297,18 @@ static int fence_update(struct i915_fence_reg *fence, ...@@ -297,26 +297,18 @@ static int fence_update(struct i915_fence_reg *fence,
* *
* 0 on success, negative error code on failure. * 0 on success, negative error code on failure.
*/ */
int i915_vma_put_fence(struct i915_vma *vma) int i915_vma_revoke_fence(struct i915_vma *vma)
{ {
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
struct i915_fence_reg *fence = vma->fence; struct i915_fence_reg *fence = vma->fence;
int err;
lockdep_assert_held(&vma->vm->mutex);
if (!fence) if (!fence)
return 0; return 0;
if (atomic_read(&fence->pin_count)) if (atomic_read(&fence->pin_count))
return -EBUSY; return -EBUSY;
err = mutex_lock_interruptible(&ggtt->vm.mutex); return fence_update(fence, NULL);
if (err)
return err;
err = fence_update(fence, NULL);
mutex_unlock(&ggtt->vm.mutex);
return err;
} }
static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
......
...@@ -982,7 +982,9 @@ int i915_vma_unbind(struct i915_vma *vma) ...@@ -982,7 +982,9 @@ int i915_vma_unbind(struct i915_vma *vma)
GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
/* release the fence reg _after_ flushing */ /* release the fence reg _after_ flushing */
ret = i915_vma_put_fence(vma); mutex_lock(&vma->vm->mutex);
ret = i915_vma_revoke_fence(vma);
mutex_unlock(&vma->vm->mutex);
if (ret) if (ret)
return ret; return ret;
......
...@@ -421,8 +421,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) ...@@ -421,8 +421,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
* *
* True if the vma has a fence, false otherwise. * True if the vma has a fence, false otherwise.
*/ */
int i915_vma_pin_fence(struct i915_vma *vma); int __must_check i915_vma_pin_fence(struct i915_vma *vma);
int __must_check i915_vma_put_fence(struct i915_vma *vma); int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment