Commit e3793468 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex

On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from within the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/211Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200130181710.2030251-3-chris@chris-wilson.co.uk
parent e986209c
...@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, ...@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{ {
unsigned int flags; unsigned int flags;
flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/* /*
* On g33, we cannot place HWS above 256MiB, so * On g33, we cannot place HWS above 256MiB, so
...@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, ...@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never * above the mappable region (even though we never
* actually map it). * actually map it).
*/ */
flags |= PIN_MAPPABLE; flags = PIN_MAPPABLE;
else else
flags |= PIN_HIGH; flags = PIN_HIGH;
return i915_vma_pin(vma, 0, 0, flags); return i915_ggtt_pin(vma, 0, flags);
} }
static int init_status_page(struct intel_engine_cs *engine) static int init_status_page(struct intel_engine_cs *engine)
......
...@@ -106,6 +106,11 @@ static bool needs_idle_maps(struct drm_i915_private *i915) ...@@ -106,6 +106,11 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
void i915_ggtt_suspend(struct i915_ggtt *ggtt) void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{ {
struct i915_vma *vma;
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
i915_vma_wait_for_bind(vma);
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
ggtt->invalidate(ggtt); ggtt->invalidate(ggtt);
...@@ -841,6 +846,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ...@@ -841,6 +846,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
} }
ggtt->invalidate = gen8_ggtt_invalidate; ggtt->invalidate = gen8_ggtt_invalidate;
......
...@@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) ...@@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref; goto err_unref;
} }
ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret) if (ret)
goto err_unref; goto err_unref;
......
...@@ -3255,7 +3255,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) ...@@ -3255,7 +3255,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err; goto err;
} }
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) if (err)
goto err; goto err;
......
...@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) ...@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count)) if (atomic_fetch_inc(&ring->pin_count))
return 0; return 0;
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen) if (vma->obj->stolen)
flags |= PIN_MAPPABLE; flags |= PIN_MAPPABLE;
else else
flags |= PIN_HIGH; flags |= PIN_HIGH;
ret = i915_vma_pin(vma, 0, 0, flags); ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret)) if (unlikely(ret))
goto err_unpin; goto err_unpin;
......
...@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl) ...@@ -308,7 +308,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0)) if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0; return 0;
err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err) if (err)
return err; return err;
...@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, ...@@ -431,7 +431,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback; goto err_rollback;
} }
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) { if (err) {
__idle_hwsp_free(vma->private, cacheline); __idle_hwsp_free(vma->private, cacheline);
goto err_rollback; goto err_rollback;
......
...@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) ...@@ -678,8 +678,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma)) if (IS_ERR(vma))
goto err; goto err;
flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
ret = i915_vma_pin(vma, 0, 0, flags); ret = i915_ggtt_pin(vma, 0, flags);
if (ret) { if (ret) {
vma = ERR_PTR(ret); vma = ERR_PTR(ret);
goto err; goto err;
......
...@@ -390,13 +390,19 @@ int i915_active_ref(struct i915_active *ref, ...@@ -390,13 +390,19 @@ int i915_active_ref(struct i915_active *ref,
return err; return err;
} }
void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{ {
struct dma_fence *prev;
/* We expect the caller to manage the exclusive timeline ordering */ /* We expect the caller to manage the exclusive timeline ordering */
GEM_BUG_ON(i915_active_is_idle(ref)); GEM_BUG_ON(i915_active_is_idle(ref));
if (!__i915_active_fence_set(&ref->excl, f)) prev = __i915_active_fence_set(&ref->excl, f);
if (!prev)
atomic_inc(&ref->count); atomic_inc(&ref->count);
return prev;
} }
bool i915_active_acquire_if_busy(struct i915_active *ref) bool i915_active_acquire_if_busy(struct i915_active *ref)
......
...@@ -173,7 +173,8 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq) ...@@ -173,7 +173,8 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
} }
void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
static inline bool i915_active_has_exclusive(struct i915_active *ref) static inline bool i915_active_has_exclusive(struct i915_active *ref)
{ {
......
...@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, ...@@ -1009,6 +1009,12 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
ret = i915_vma_wait_for_bind(vma);
if (ret) {
i915_vma_unpin(vma);
return ERR_PTR(ret);
}
return vma; return vma;
} }
......
...@@ -294,6 +294,7 @@ struct i915_vma_work { ...@@ -294,6 +294,7 @@ struct i915_vma_work {
struct dma_fence_work base; struct dma_fence_work base;
struct i915_vma *vma; struct i915_vma *vma;
struct drm_i915_gem_object *pinned; struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
enum i915_cache_level cache_level; enum i915_cache_level cache_level;
unsigned int flags; unsigned int flags;
}; };
...@@ -339,6 +340,25 @@ struct i915_vma_work *i915_vma_work(void) ...@@ -339,6 +340,25 @@ struct i915_vma_work *i915_vma_work(void)
return vw; return vw;
} }
int i915_vma_wait_for_bind(struct i915_vma *vma)
{
int err = 0;
if (rcu_access_pointer(vma->active.excl.fence)) {
struct dma_fence *fence;
rcu_read_lock();
fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
rcu_read_unlock();
if (fence) {
err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);
}
}
return err;
}
/** /**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map * @vma: VMA to map
...@@ -386,6 +406,8 @@ int i915_vma_bind(struct i915_vma *vma, ...@@ -386,6 +406,8 @@ int i915_vma_bind(struct i915_vma *vma,
trace_i915_vma_bind(vma, bind_flags); trace_i915_vma_bind(vma, bind_flags);
if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
struct dma_fence *prev;
work->vma = vma; work->vma = vma;
work->cache_level = cache_level; work->cache_level = cache_level;
work->flags = bind_flags | I915_VMA_ALLOC; work->flags = bind_flags | I915_VMA_ALLOC;
...@@ -399,8 +421,12 @@ int i915_vma_bind(struct i915_vma *vma, ...@@ -399,8 +421,12 @@ int i915_vma_bind(struct i915_vma *vma,
* part of the obj->resv->excl_fence as it only affects * part of the obj->resv->excl_fence as it only affects
* execution and not content or object's backing store lifetime. * execution and not content or object's backing store lifetime.
*/ */
GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
i915_active_set_exclusive(&vma->active, &work->base.dma); if (prev)
__i915_sw_fence_await_dma_fence(&work->base.chain,
prev,
&work->cb);
work->base.dma.error = 0; /* enable the queue_work() */ work->base.dma.error = 0; /* enable the queue_work() */
if (vma->obj) { if (vma->obj) {
...@@ -408,7 +434,6 @@ int i915_vma_bind(struct i915_vma *vma, ...@@ -408,7 +434,6 @@ int i915_vma_bind(struct i915_vma *vma,
work->pinned = vma->obj; work->pinned = vma->obj;
} }
} else { } else {
GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags);
ret = vma->ops->bind_vma(vma, cache_level, bind_flags); ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
if (ret) if (ret)
return ret; return ret;
...@@ -977,8 +1002,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) ...@@ -977,8 +1002,14 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
do { do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
if (err)
i915_vma_unpin(vma);
}
return err; return err;
}
/* Unlike i915_vma_pin, we don't take no for an answer! */ /* Unlike i915_vma_pin, we don't take no for an answer! */
flush_idle_contexts(vm->gt); flush_idle_contexts(vm->gt);
......
...@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); ...@@ -375,6 +375,8 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma);
int i915_vma_wait_for_bind(struct i915_vma *vma);
static inline int i915_vma_sync(struct i915_vma *vma) static inline int i915_vma_sync(struct i915_vma *vma)
{ {
/* Wait for the asynchronous bindings and pending GPU reads */ /* Wait for the asynchronous bindings and pending GPU reads */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment