Commit 28176ef4 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Reserve space in the global seqno during request allocation

A restriction on our global seqno is that they cannot wrap, and that we
cannot use the value 0. This allows us to detect when a request has not
yet been submitted, its global seqno is still 0, and ensures that
hardware semaphores are monotonic as required by older hardware. To
meet these restrictions when we defer the assignment of the global
seqno, we must check that we have an available slot in the global seqno
space during request construction. If that test fails, we wait for all
requests to be completed and reset the hardware back to 0.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-33-chris@chris-wilson.co.uk
parent f6168e33
...@@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) ...@@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
engine->name, engine->name,
i915_gem_request_get_seqno(work->flip_queued_req), i915_gem_request_get_seqno(work->flip_queued_req),
dev_priv->gt.global_timeline.next_seqno, atomic_read(&dev_priv->gt.global_timeline.next_seqno),
intel_engine_get_seqno(engine), intel_engine_get_seqno(engine),
i915_gem_request_completed(work->flip_queued_req)); i915_gem_request_completed(work->flip_queued_req));
} else } else
...@@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val) ...@@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val)
{ {
struct drm_i915_private *dev_priv = data; struct drm_i915_private *dev_priv = data;
*val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno); *val = atomic_read(&dev_priv->gt.global_timeline.next_seqno);
return 0; return 0;
} }
...@@ -2277,8 +2277,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) ...@@ -2277,8 +2277,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
struct drm_file *file; struct drm_file *file;
seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
seq_printf(m, "GPU busy? %s [%x]\n", seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Frequency requested %d\n", seq_printf(m, "Frequency requested %d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
...@@ -2313,7 +2313,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) ...@@ -2313,7 +2313,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
if (INTEL_GEN(dev_priv) >= 6 && if (INTEL_GEN(dev_priv) >= 6 &&
dev_priv->rps.enabled && dev_priv->rps.enabled &&
dev_priv->gt.active_engines) { dev_priv->gt.active_requests) {
u32 rpup, rpupei; u32 rpup, rpupei;
u32 rpdown, rpdownei; u32 rpdown, rpdownei;
......
...@@ -2092,6 +2092,7 @@ struct drm_i915_private { ...@@ -2092,6 +2092,7 @@ struct drm_i915_private {
struct list_head timelines; struct list_head timelines;
struct i915_gem_timeline global_timeline; struct i915_gem_timeline global_timeline;
u32 active_requests;
/** /**
* Is the GPU currently considered idle, or busy executing * Is the GPU currently considered idle, or busy executing
...@@ -2100,7 +2101,6 @@ struct drm_i915_private { ...@@ -2100,7 +2101,6 @@ struct drm_i915_private {
* In order to reduce the effect on performance, there * In order to reduce the effect on performance, there
* is a slight delay before we do so. * is a slight delay before we do so.
*/ */
unsigned int active_engines;
bool awake; bool awake;
/** /**
......
...@@ -2688,8 +2688,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) ...@@ -2688,8 +2688,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
spin_unlock(&engine->execlist_lock); spin_unlock(&engine->execlist_lock);
} }
engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
} }
void i915_gem_set_wedged(struct drm_i915_private *dev_priv) void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
...@@ -2746,7 +2744,7 @@ i915_gem_idle_work_handler(struct work_struct *work) ...@@ -2746,7 +2744,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (!READ_ONCE(dev_priv->gt.awake)) if (!READ_ONCE(dev_priv->gt.awake))
return; return;
if (READ_ONCE(dev_priv->gt.active_engines)) if (READ_ONCE(dev_priv->gt.active_requests))
return; return;
rearm_hangcheck = rearm_hangcheck =
...@@ -2760,7 +2758,7 @@ i915_gem_idle_work_handler(struct work_struct *work) ...@@ -2760,7 +2758,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
goto out_rearm; goto out_rearm;
} }
if (dev_priv->gt.active_engines) if (dev_priv->gt.active_requests)
goto out_unlock; goto out_unlock;
for_each_engine(engine, dev_priv, id) for_each_engine(engine, dev_priv, id)
...@@ -4399,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev) ...@@ -4399,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev)
goto err; goto err;
i915_gem_retire_requests(dev_priv); i915_gem_retire_requests(dev_priv);
GEM_BUG_ON(dev_priv->gt.active_requests);
assert_kernel_context_is_current(dev_priv); assert_kernel_context_is_current(dev_priv);
i915_gem_context_lost(dev_priv); i915_gem_context_lost(dev_priv);
......
...@@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) ...@@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
*/ */
list_del(&request->ring_link); list_del(&request->ring_link);
request->ring->last_retired_head = request->postfix; request->ring->last_retired_head = request->postfix;
request->i915->gt.active_requests--;
/* Walk through the active list, calling retire on each. This allows /* Walk through the active list, calling retire on each. This allows
* objects to track their GPU activity and mark themselves as idle * objects to track their GPU activity and mark themselves as idle
...@@ -253,13 +254,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) ...@@ -253,13 +254,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
return ret; return ret;
i915_gem_retire_requests(i915); i915_gem_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests > 1);
/* If the seqno wraps around, we need to clear the breadcrumb rbtree */ /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
if (!i915_seqno_passed(seqno, timeline->next_seqno)) { if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) {
while (intel_kick_waiters(i915) || intel_kick_signalers(i915)) while (intel_kick_waiters(i915) || intel_kick_signalers(i915))
yield(); yield();
yield(); yield();
} }
atomic_set(&timeline->next_seqno, seqno);
/* Finally reset hw state */ /* Finally reset hw state */
for_each_engine(engine, i915, id) for_each_engine(engine, i915, id)
...@@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) ...@@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
...@@ -289,34 +291,33 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) ...@@ -289,34 +291,33 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
/* HWS page needs to be set less than what we /* HWS page needs to be set less than what we
* will inject to ring * will inject to ring
*/ */
ret = i915_gem_init_global_seqno(dev_priv, seqno - 1); return i915_gem_init_global_seqno(dev_priv, seqno - 1);
if (ret)
return ret;
dev_priv->gt.global_timeline.next_seqno = seqno;
return 0;
} }
static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv, static int reserve_global_seqno(struct drm_i915_private *i915)
u32 *seqno)
{ {
struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline; u32 active_requests = ++i915->gt.active_requests;
u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno);
/* reserve 0 for non-seqno */ int ret;
if (unlikely(tl->next_seqno == 0)) {
int ret;
ret = i915_gem_init_global_seqno(dev_priv, 0); /* Reservation is fine until we need to wrap around */
if (ret) if (likely(next_seqno + active_requests > next_seqno))
return ret; return 0;
tl->next_seqno = 1; ret = i915_gem_init_global_seqno(i915, 0);
if (ret) {
i915->gt.active_requests--;
return ret;
} }
*seqno = tl->next_seqno++;
return 0; return 0;
} }
static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
{
return atomic_inc_return(&tl->next_seqno);
}
static int __i915_sw_fence_call static int __i915_sw_fence_call
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{ {
...@@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
{ {
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
struct drm_i915_gem_request *req; struct drm_i915_gem_request *req;
u32 seqno;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
* and restart. * and restart.
...@@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
ret = reserve_global_seqno(dev_priv);
if (ret)
return ERR_PTR(ret);
/* Move the oldest request to the slab-cache (if not in use!) */ /* Move the oldest request to the slab-cache (if not in use!) */
req = list_first_entry_or_null(&engine->timeline->requests, req = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link); typeof(*req), link);
...@@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* Do not use kmem_cache_zalloc() here! * Do not use kmem_cache_zalloc() here!
*/ */
req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
if (!req) if (!req) {
return ERR_PTR(-ENOMEM); ret = -ENOMEM;
goto err_unreserve;
ret = i915_gem_get_global_seqno(dev_priv, &seqno); }
if (ret)
goto err;
req->timeline = engine->timeline; req->timeline = engine->timeline;
...@@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
&i915_fence_ops, &i915_fence_ops,
&req->lock, &req->lock,
req->timeline->fence_context, req->timeline->fence_context,
seqno); timeline_get_seqno(req->timeline->common));
i915_sw_fence_init(&req->submit, submit_notify); i915_sw_fence_init(&req->submit, submit_notify);
INIT_LIST_HEAD(&req->active_list); INIT_LIST_HEAD(&req->active_list);
req->i915 = dev_priv; req->i915 = dev_priv;
req->engine = engine; req->engine = engine;
req->global_seqno = seqno; req->global_seqno = req->fence.seqno;
req->ctx = i915_gem_context_get(ctx); req->ctx = i915_gem_context_get(ctx);
/* No zalloc, must clear what we need by hand */ /* No zalloc, must clear what we need by hand */
...@@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
err_ctx: err_ctx:
i915_gem_context_put(ctx); i915_gem_context_put(ctx);
err:
kmem_cache_free(dev_priv->requests, req); kmem_cache_free(dev_priv->requests, req);
err_unreserve:
dev_priv->gt.active_requests--;
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -624,7 +629,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) ...@@ -624,7 +629,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
dev_priv->gt.active_engines |= intel_engine_flag(engine);
if (dev_priv->gt.awake) if (dev_priv->gt.awake)
return; return;
...@@ -700,6 +704,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) ...@@ -700,6 +704,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
&request->submitq); &request->submitq);
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
request->fence.seqno));
request->emitted_jiffies = jiffies; request->emitted_jiffies = jiffies;
request->previous_seqno = timeline->last_pending_seqno; request->previous_seqno = timeline->last_pending_seqno;
timeline->last_pending_seqno = request->fence.seqno; timeline->last_pending_seqno = request->fence.seqno;
...@@ -962,38 +969,35 @@ long i915_wait_request(struct drm_i915_gem_request *req, ...@@ -962,38 +969,35 @@ long i915_wait_request(struct drm_i915_gem_request *req,
return timeout; return timeout;
} }
static bool engine_retire_requests(struct intel_engine_cs *engine) static void engine_retire_requests(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request, *next; struct drm_i915_gem_request *request, *next;
list_for_each_entry_safe(request, next, list_for_each_entry_safe(request, next,
&engine->timeline->requests, link) { &engine->timeline->requests, link) {
if (!i915_gem_request_completed(request)) if (!i915_gem_request_completed(request))
return false; return;
i915_gem_request_retire(request); i915_gem_request_retire(request);
} }
return true;
} }
void i915_gem_retire_requests(struct drm_i915_private *dev_priv) void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
unsigned int tmp; enum intel_engine_id id;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
if (dev_priv->gt.active_engines == 0) if (!dev_priv->gt.active_requests)
return; return;
GEM_BUG_ON(!dev_priv->gt.awake); GEM_BUG_ON(!dev_priv->gt.awake);
for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp) for_each_engine(engine, dev_priv, id)
if (engine_retire_requests(engine)) engine_retire_requests(engine);
dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
if (dev_priv->gt.active_engines == 0) if (!dev_priv->gt.active_requests)
queue_delayed_work(dev_priv->wq, queue_delayed_work(dev_priv->wq,
&dev_priv->gt.idle_work, &dev_priv->gt.idle_work,
msecs_to_jiffies(100)); msecs_to_jiffies(100));
......
...@@ -55,7 +55,7 @@ struct intel_timeline { ...@@ -55,7 +55,7 @@ struct intel_timeline {
struct i915_gem_timeline { struct i915_gem_timeline {
struct list_head link; struct list_head link;
u32 next_seqno; atomic_t next_seqno;
struct drm_i915_private *i915; struct drm_i915_private *i915;
const char *name; const char *name;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment