Commit e6ba9992 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Differentiate between sw write location into ring and last hw read

We need to keep track of the last location we ask the hw to read up to
(RING_TAIL) separately from our last write location into the ring, so
that in the event of a GPU reset we do not tell the HW to proceed into
a partially written request (which can happen if that request is waiting
for an external signal before being executed).

v2: Refactor intel_ring_reset() (Mika)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144
Testcase: igt/gem_exec_fence/await-hang
Fixes: 821ed7df ("drm/i915: Update reset path to fix incomplete requests")
Fixes: d55ac5bf ("drm/i915: Defer transfer onto execution timeline to actual hw submission")
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170425130049.26147-1-chris@chris-wilson.co.ukReviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
parent 6b764a59
...@@ -283,10 +283,18 @@ static void advance_ring(struct drm_i915_gem_request *request) ...@@ -283,10 +283,18 @@ static void advance_ring(struct drm_i915_gem_request *request)
* Note this requires that we are always called in request * Note this requires that we are always called in request
* completion order. * completion order.
*/ */
if (list_is_last(&request->ring_link, &request->ring->request_list)) if (list_is_last(&request->ring_link, &request->ring->request_list)) {
tail = request->ring->tail; /* We may race here with execlists resubmitting this request
else * as we retire it. The resubmission will move the ring->tail
* forwards (to request->wa_tail). We either read the
* current value that was written to hw, or the value that
* is just about to be. Either works, if we miss the last two
* noops - they are safe to be replayed on a reset.
*/
tail = READ_ONCE(request->ring->tail);
} else {
tail = request->postfix; tail = request->postfix;
}
list_del(&request->ring_link); list_del(&request->ring_link);
request->ring->head = tail; request->ring->head = tail;
...@@ -651,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -651,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* GPU processing the request, we never over-estimate the * GPU processing the request, we never over-estimate the
* position of the head. * position of the head.
*/ */
req->head = req->ring->tail; req->head = req->ring->emit;
/* Check that we didn't interrupt ourselves with a new request */ /* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
......
...@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, ...@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
GEM_BUG_ON(freespace < wqi_size); GEM_BUG_ON(freespace < wqi_size);
/* The GuC firmware wants the tail index in QWords, not bytes */ /* The GuC firmware wants the tail index in QWords, not bytes */
tail = rq->tail; tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
assert_ring_tail_valid(rq->ring, rq->tail);
tail >>= 3;
GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
......
...@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) ...@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state; u32 *reg_state = ce->lrc_reg_state;
assert_ring_tail_valid(rq->ring, rq->tail); reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
reg_state[CTX_RING_TAIL+1] = rq->tail;
/* True 32b PPGTT with dynamic page allocation: update PDP /* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page. * registers and point the unallocated PDPs to scratch page.
...@@ -2057,8 +2056,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) ...@@ -2057,8 +2056,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
ce->state->obj->mm.dirty = true; ce->state->obj->mm.dirty = true;
i915_gem_object_unpin_map(ce->state->obj); i915_gem_object_unpin_map(ce->state->obj);
ce->ring->head = ce->ring->tail = 0; intel_ring_reset(ce->ring, 0);
intel_ring_update_space(ce->ring);
} }
} }
} }
...@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) ...@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
void intel_ring_update_space(struct intel_ring *ring) void intel_ring_update_space(struct intel_ring *ring)
{ {
ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
} }
static int static int
...@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) ...@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
i915_gem_request_submit(request); i915_gem_request_submit(request);
assert_ring_tail_valid(request->ring, request->tail); I915_WRITE_TAIL(request->engine,
I915_WRITE_TAIL(request->engine, request->tail); intel_ring_set_tail(request->ring, request->tail));
} }
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
...@@ -1319,11 +1319,23 @@ int intel_ring_pin(struct intel_ring *ring, ...@@ -1319,11 +1319,23 @@ int intel_ring_pin(struct intel_ring *ring,
return PTR_ERR(addr); return PTR_ERR(addr);
} }
void intel_ring_reset(struct intel_ring *ring, u32 tail)
{
GEM_BUG_ON(!list_empty(&ring->request_list));
ring->tail = tail;
ring->head = tail;
ring->emit = tail;
intel_ring_update_space(ring);
}
void intel_ring_unpin(struct intel_ring *ring) void intel_ring_unpin(struct intel_ring *ring)
{ {
GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr); GEM_BUG_ON(!ring->vaddr);
/* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->tail);
if (i915_vma_is_map_and_fenceable(ring->vma)) if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma); i915_vma_unpin_iomap(ring->vma);
else else
...@@ -1555,8 +1567,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) ...@@ -1555,8 +1567,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
/* Restart from the beginning of the rings for convenience */
for_each_engine(engine, dev_priv, id) for_each_engine(engine, dev_priv, id)
engine->buffer->head = engine->buffer->tail; intel_ring_reset(engine->buffer, 0);
} }
static int ring_request_alloc(struct drm_i915_gem_request *request) static int ring_request_alloc(struct drm_i915_gem_request *request)
...@@ -1609,7 +1622,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) ...@@ -1609,7 +1622,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
unsigned space; unsigned space;
/* Would completion of this request free enough space? */ /* Would completion of this request free enough space? */
space = __intel_ring_space(target->postfix, ring->tail, space = __intel_ring_space(target->postfix, ring->emit,
ring->size); ring->size);
if (space >= bytes) if (space >= bytes)
break; break;
...@@ -1634,8 +1647,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) ...@@ -1634,8 +1647,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
{ {
struct intel_ring *ring = req->ring; struct intel_ring *ring = req->ring;
int remain_actual = ring->size - ring->tail; int remain_actual = ring->size - ring->emit;
int remain_usable = ring->effective_size - ring->tail; int remain_usable = ring->effective_size - ring->emit;
int bytes = num_dwords * sizeof(u32); int bytes = num_dwords * sizeof(u32);
int total_bytes, wait_bytes; int total_bytes, wait_bytes;
bool need_wrap = false; bool need_wrap = false;
...@@ -1671,17 +1684,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ...@@ -1671,17 +1684,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
if (unlikely(need_wrap)) { if (unlikely(need_wrap)) {
GEM_BUG_ON(remain_actual > ring->space); GEM_BUG_ON(remain_actual > ring->space);
GEM_BUG_ON(ring->tail + remain_actual > ring->size); GEM_BUG_ON(ring->emit + remain_actual > ring->size);
/* Fill the tail with MI_NOOP */ /* Fill the tail with MI_NOOP */
memset(ring->vaddr + ring->tail, 0, remain_actual); memset(ring->vaddr + ring->emit, 0, remain_actual);
ring->tail = 0; ring->emit = 0;
ring->space -= remain_actual; ring->space -= remain_actual;
} }
GEM_BUG_ON(ring->tail > ring->size - bytes); GEM_BUG_ON(ring->emit > ring->size - bytes);
cs = ring->vaddr + ring->tail; cs = ring->vaddr + ring->emit;
ring->tail += bytes; ring->emit += bytes;
ring->space -= bytes; ring->space -= bytes;
GEM_BUG_ON(ring->space < 0); GEM_BUG_ON(ring->space < 0);
...@@ -1692,7 +1705,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ...@@ -1692,7 +1705,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
int intel_ring_cacheline_align(struct drm_i915_gem_request *req) int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{ {
int num_dwords = int num_dwords =
(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
u32 *cs; u32 *cs;
if (num_dwords == 0) if (num_dwords == 0)
......
...@@ -143,6 +143,7 @@ struct intel_ring { ...@@ -143,6 +143,7 @@ struct intel_ring {
u32 head; u32 head;
u32 tail; u32 tail;
u32 emit;
int space; int space;
int size; int size;
...@@ -494,6 +495,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size); ...@@ -494,6 +495,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring, int intel_ring_pin(struct intel_ring *ring,
struct drm_i915_private *i915, struct drm_i915_private *i915,
unsigned int offset_bias); unsigned int offset_bias);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
void intel_ring_update_space(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring);
...@@ -517,7 +520,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) ...@@ -517,7 +520,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
* reserved for the command packet (i.e. the value passed to * reserved for the command packet (i.e. the value passed to
* intel_ring_begin()). * intel_ring_begin()).
*/ */
GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
} }
static inline u32 static inline u32
...@@ -546,7 +549,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) ...@@ -546,7 +549,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
GEM_BUG_ON(tail >= ring->size); GEM_BUG_ON(tail >= ring->size);
} }
void intel_ring_update_space(struct intel_ring *ring); static inline unsigned int
intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
{
/* Whilst writes to the tail are strictly order, there is no
* serialisation between readers and the writers. The tail may be
* read by i915_gem_request_retire() just as it is being updated
* by execlists, as although the breadcrumb is complete, the context
* switch hasn't been seen.
*/
assert_ring_tail_valid(ring, tail);
ring->tail = tail;
return tail;
}
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment