Commit 85e2fe67 authored by Michał Winiarski's avatar Michał Winiarski Committed by Chris Wilson

drm/i915/guc: Submit GuC workitems containing coalesced requests

To create an upper bound on number of GuC workitems, we need to change
the way that requests are being submitted. Rather than submitting each
request as an individual workitem, we can do coalescing in a similar way
we're handlig execlist submission ports. We also need to stop pretending
that we're doing "lite-restore" in GuC submission (we would create a
workitem each time we hit this condition). This allows us to completely
remove the reservation, replacing it with a compile time check.

v2: Also coalesce when replaying on reset (Daniele)
v3: Consistent wq_resv - per-request (Daniele)
v4: Squash removing wq_resv
v5: Reflect i915_guc_submit argument changes in doc
v6: Rebase on top of execlists reset/restart fix (Chris,Michał)

References: https://bugs.freedesktop.org/show_bug.cgi?id=101873
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20170914083216.10192-2-michal.winiarski@intel.comSigned-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
parent 45ec5bc8
...@@ -2450,8 +2450,6 @@ static void i915_guc_client_info(struct seq_file *m, ...@@ -2450,8 +2450,6 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail); client->wq_size, client->wq_offset, client->wq_tail);
seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
u64 submissions = client->submissions[id]; u64 submissions = client->submissions[id];
tot += submissions; tot += submissions;
......
...@@ -406,63 +406,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc, ...@@ -406,63 +406,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
memset(desc, 0, sizeof(*desc)); memset(desc, 0, sizeof(*desc));
} }
/**
* i915_guc_wq_reserve() - reserve space in the GuC's workqueue
* @request: request associated with the commands
*
* Return: 0 if space is available
* -EAGAIN if space is not currently available
*
* This function must be called (and must return 0) before a request
* is submitted to the GuC via i915_guc_submit() below. Once a result
* of 0 has been returned, it must be balanced by a corresponding
* call to submit().
*
* Reservation allows the caller to determine in advance that space
* will be available for the next submission before committing resources
* to it, and helps avoid late failures with complicated recovery paths.
*/
int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
{
const size_t wqi_size = sizeof(struct guc_wq_item);
struct i915_guc_client *client = request->i915->guc.execbuf_client;
struct guc_process_desc *desc = __get_process_desc(client);
u32 freespace;
int ret;
spin_lock_irq(&client->wq_lock);
freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
freespace -= client->wq_rsvd;
if (likely(freespace >= wqi_size)) {
client->wq_rsvd += wqi_size;
ret = 0;
} else {
client->no_wq_space++;
ret = -EAGAIN;
}
spin_unlock_irq(&client->wq_lock);
return ret;
}
static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size)
{
unsigned long flags;
spin_lock_irqsave(&client->wq_lock, flags);
client->wq_rsvd += size;
spin_unlock_irqrestore(&client->wq_lock, flags);
}
void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
{
const int wqi_size = sizeof(struct guc_wq_item);
struct i915_guc_client *client = request->i915->guc.execbuf_client;
GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
guc_client_update_wq_rsvd(client, -wqi_size);
}
/* Construct a Work Item and append it to the GuC's Work Queue */ /* Construct a Work Item and append it to the GuC's Work Queue */
static void guc_wq_item_append(struct i915_guc_client *client, static void guc_wq_item_append(struct i915_guc_client *client,
struct drm_i915_gem_request *rq) struct drm_i915_gem_request *rq)
...@@ -476,7 +419,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, ...@@ -476,7 +419,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
struct guc_wq_item *wqi; struct guc_wq_item *wqi;
u32 freespace, tail, wq_off; u32 freespace, tail, wq_off;
/* Free space is guaranteed, see i915_guc_wq_reserve() above */ /* Free space is guaranteed */
freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
GEM_BUG_ON(freespace < wqi_size); GEM_BUG_ON(freespace < wqi_size);
...@@ -491,14 +434,12 @@ static void guc_wq_item_append(struct i915_guc_client *client, ...@@ -491,14 +434,12 @@ static void guc_wq_item_append(struct i915_guc_client *client,
* workqueue buffer dw by dw. * workqueue buffer dw by dw.
*/ */
BUILD_BUG_ON(wqi_size != 16); BUILD_BUG_ON(wqi_size != 16);
GEM_BUG_ON(client->wq_rsvd < wqi_size);
/* postincrement WQ tail for next time */ /* postincrement WQ tail for next time */
wq_off = client->wq_tail; wq_off = client->wq_tail;
GEM_BUG_ON(wq_off & (wqi_size - 1)); GEM_BUG_ON(wq_off & (wqi_size - 1));
client->wq_tail += wqi_size; client->wq_tail += wqi_size;
client->wq_tail &= client->wq_size - 1; client->wq_tail &= client->wq_size - 1;
client->wq_rsvd -= wqi_size;
/* WQ starts from the page after doorbell / process_desc */ /* WQ starts from the page after doorbell / process_desc */
wqi = client->vaddr + wq_off + GUC_DB_SIZE; wqi = client->vaddr + wq_off + GUC_DB_SIZE;
...@@ -579,47 +520,42 @@ static int guc_ring_doorbell(struct i915_guc_client *client) ...@@ -579,47 +520,42 @@ static int guc_ring_doorbell(struct i915_guc_client *client)
} }
/** /**
* __i915_guc_submit() - Submit commands through GuC * i915_guc_submit() - Submit commands through GuC
* @rq: request associated with the commands * @engine: engine associated with the commands
*
* The caller must have already called i915_guc_wq_reserve() above with
* a result of 0 (success), guaranteeing that there is space in the work
* queue for the new request, so enqueuing the item cannot fail.
*
* Bad Things Will Happen if the caller violates this protocol e.g. calls
* submit() when _reserve() says there's no space, or calls _submit()
* a different number of times from (successful) calls to _reserve().
* *
* The only error here arises if the doorbell hardware isn't functioning * The only error here arises if the doorbell hardware isn't functioning
* as expected, which really shouln't happen. * as expected, which really shouln't happen.
*/ */
static void __i915_guc_submit(struct drm_i915_gem_request *rq) static void i915_guc_submit(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = rq->i915; struct drm_i915_private *dev_priv = engine->i915;
struct intel_engine_cs *engine = rq->engine; struct intel_guc *guc = &dev_priv->guc;
unsigned int engine_id = engine->id;
struct intel_guc *guc = &rq->i915->guc;
struct i915_guc_client *client = guc->execbuf_client; struct i915_guc_client *client = guc->execbuf_client;
unsigned long flags; struct execlist_port *port = engine->execlist_port;
unsigned int engine_id = engine->id;
unsigned int n;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
struct drm_i915_gem_request *rq;
unsigned int count;
rq = port_unpack(&port[n], &count);
if (rq && count == 0) {
port_set(&port[n], port_pack(rq, ++count));
/* WA to flush out the pending GMADR writes to ring buffer. */
if (i915_vma_is_map_and_fenceable(rq->ring->vma)) if (i915_vma_is_map_and_fenceable(rq->ring->vma))
POSTING_READ_FW(GUC_STATUS); POSTING_READ_FW(GUC_STATUS);
spin_lock_irqsave(&client->wq_lock, flags); spin_lock(&client->wq_lock);
guc_wq_item_append(client, rq); guc_wq_item_append(client, rq);
WARN_ON(guc_ring_doorbell(client)); WARN_ON(guc_ring_doorbell(client));
client->submissions[engine_id] += 1; client->submissions[engine_id] += 1;
spin_unlock_irqrestore(&client->wq_lock, flags); spin_unlock(&client->wq_lock);
} }
}
static void i915_guc_submit(struct drm_i915_gem_request *rq)
{
__i915_gem_request_submit(rq);
__i915_guc_submit(rq);
} }
static void nested_enable_signaling(struct drm_i915_gem_request *rq) static void nested_enable_signaling(struct drm_i915_gem_request *rq)
...@@ -653,16 +589,19 @@ static void port_assign(struct execlist_port *port, ...@@ -653,16 +589,19 @@ static void port_assign(struct execlist_port *port,
if (port_isset(port)) if (port_isset(port))
i915_gem_request_put(port_request(port)); i915_gem_request_put(port_request(port));
port_set(port, i915_gem_request_get(rq)); port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
nested_enable_signaling(rq); nested_enable_signaling(rq);
} }
static bool i915_guc_dequeue(struct intel_engine_cs *engine) static void i915_guc_dequeue(struct intel_engine_cs *engine)
{ {
struct execlist_port *port = engine->execlist_port; struct execlist_port *port = engine->execlist_port;
struct drm_i915_gem_request *last = port_request(port); struct drm_i915_gem_request *last = NULL;
struct rb_node *rb;
bool submit = false; bool submit = false;
struct rb_node *rb;
if (port_isset(port))
port++;
spin_lock_irq(&engine->timeline->lock); spin_lock_irq(&engine->timeline->lock);
rb = engine->execlist_first; rb = engine->execlist_first;
...@@ -687,7 +626,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) ...@@ -687,7 +626,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&rq->priotree.link); INIT_LIST_HEAD(&rq->priotree.link);
rq->priotree.priority = INT_MAX; rq->priotree.priority = INT_MAX;
i915_guc_submit(rq); __i915_gem_request_submit(rq);
trace_i915_gem_request_in(rq, port_index(port, engine)); trace_i915_gem_request_in(rq, port_index(port, engine));
last = rq; last = rq;
submit = true; submit = true;
...@@ -701,11 +640,11 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) ...@@ -701,11 +640,11 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
} }
done: done:
engine->execlist_first = rb; engine->execlist_first = rb;
if (submit) if (submit) {
port_assign(port, last); port_assign(port, last);
i915_guc_submit(engine);
}
spin_unlock_irq(&engine->timeline->lock); spin_unlock_irq(&engine->timeline->lock);
return submit;
} }
static void i915_guc_irq_handler(unsigned long data) static void i915_guc_irq_handler(unsigned long data)
...@@ -713,9 +652,7 @@ static void i915_guc_irq_handler(unsigned long data) ...@@ -713,9 +652,7 @@ static void i915_guc_irq_handler(unsigned long data)
struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
struct execlist_port *port = engine->execlist_port; struct execlist_port *port = engine->execlist_port;
struct drm_i915_gem_request *rq; struct drm_i915_gem_request *rq;
bool submit;
do {
rq = port_request(&port[0]); rq = port_request(&port[0]);
while (rq && i915_gem_request_completed(rq)) { while (rq && i915_gem_request_completed(rq)) {
trace_i915_gem_request_out(rq); trace_i915_gem_request_out(rq);
...@@ -727,10 +664,8 @@ static void i915_guc_irq_handler(unsigned long data) ...@@ -727,10 +664,8 @@ static void i915_guc_irq_handler(unsigned long data)
rq = port_request(&port[0]); rq = port_request(&port[0]);
} }
submit = false; if (!port_isset(&port[1]))
if (!port_count(&port[1])) i915_guc_dequeue(engine);
submit = i915_guc_dequeue(engine);
} while (submit);
} }
/* /*
...@@ -1239,6 +1174,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) ...@@ -1239,6 +1174,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
enum intel_engine_id id; enum intel_engine_id id;
int err; int err;
/*
* We're using GuC work items for submitting work through GuC. Since
* we're coalescing multiple requests from a single context into a
* single work item prior to assigning it to execlist_port, we can
* never have more work items than the total number of ports (for all
* engines). The GuC firmware is controlling the HEAD of work queue,
* and it is guaranteed that it will remove the work item from the
* queue before our request is completed.
*/
BUILD_BUG_ON(ARRAY_SIZE(engine->execlist_port) *
sizeof(struct guc_wq_item) *
I915_NUM_ENGINES > GUC_WQ_SIZE);
if (!client) { if (!client) {
client = guc_client_alloc(dev_priv, client = guc_client_alloc(dev_priv,
INTEL_INFO(dev_priv)->ring_mask, INTEL_INFO(dev_priv)->ring_mask,
...@@ -1266,9 +1214,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) ...@@ -1266,9 +1214,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
guc_interrupts_capture(dev_priv); guc_interrupts_capture(dev_priv);
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
const int wqi_size = sizeof(struct guc_wq_item);
struct drm_i915_gem_request *rq;
/* The tasklet was initialised by execlists, and may be in /* The tasklet was initialised by execlists, and may be in
* a state of flux (across a reset) and so we just want to * a state of flux (across a reset) and so we just want to
* take over the callback without changing any other state * take over the callback without changing any other state
...@@ -1276,14 +1221,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) ...@@ -1276,14 +1221,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
*/ */
engine->irq_tasklet.func = i915_guc_irq_handler; engine->irq_tasklet.func = i915_guc_irq_handler;
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
tasklet_schedule(&engine->irq_tasklet);
/* Replay the current set of previously submitted requests */
spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link) {
guc_client_update_wq_rsvd(client, wqi_size);
__i915_guc_submit(rq);
}
spin_unlock_irq(&engine->timeline->lock);
} }
return 0; return 0;
......
...@@ -988,27 +988,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) ...@@ -988,27 +988,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
*/ */
request->reserved_space += EXECLISTS_REQUEST_SIZE; request->reserved_space += EXECLISTS_REQUEST_SIZE;
if (i915.enable_guc_submission) {
/*
* Check that the GuC has space for the request before
* going any further, as the i915_add_request() call
* later on mustn't fail ...
*/
ret = i915_guc_wq_reserve(request);
if (ret)
goto err;
}
cs = intel_ring_begin(request, 0); cs = intel_ring_begin(request, 0);
if (IS_ERR(cs)) { if (IS_ERR(cs))
ret = PTR_ERR(cs); return PTR_ERR(cs);
goto err_unreserve;
}
if (!ce->initialised) { if (!ce->initialised) {
ret = engine->init_context(request); ret = engine->init_context(request);
if (ret) if (ret)
goto err_unreserve; return ret;
ce->initialised = true; ce->initialised = true;
} }
...@@ -1022,12 +1009,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) ...@@ -1022,12 +1009,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
request->reserved_space -= EXECLISTS_REQUEST_SIZE; request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0; return 0;
err_unreserve:
if (i915.enable_guc_submission)
i915_guc_wq_unreserve(request);
err:
return ret;
} }
/* /*
......
...@@ -52,13 +52,6 @@ struct drm_i915_gem_request; ...@@ -52,13 +52,6 @@ struct drm_i915_gem_request;
* GuC). The subsequent pages of the client object constitute the work * GuC). The subsequent pages of the client object constitute the work
* queue (a circular array of work items), again described in the process * queue (a circular array of work items), again described in the process
* descriptor. Work queue pages are mapped momentarily as required. * descriptor. Work queue pages are mapped momentarily as required.
*
* We also keep a few statistics on failures. Ideally, these should all
* be zero!
* no_wq_space: times that the submission pre-check found no space was
* available in the work queue (note, the queue is shared,
* not per-engine). It is OK for this to be nonzero, but
* it should not be huge!
*/ */
struct i915_guc_client { struct i915_guc_client {
struct i915_vma *vma; struct i915_vma *vma;
...@@ -79,8 +72,6 @@ struct i915_guc_client { ...@@ -79,8 +72,6 @@ struct i915_guc_client {
uint32_t wq_offset; uint32_t wq_offset;
uint32_t wq_size; uint32_t wq_size;
uint32_t wq_tail; uint32_t wq_tail;
uint32_t wq_rsvd;
uint32_t no_wq_space;
/* Per-engine counts of GuC submissions */ /* Per-engine counts of GuC submissions */
uint64_t submissions[I915_NUM_ENGINES]; uint64_t submissions[I915_NUM_ENGINES];
...@@ -246,8 +237,6 @@ u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); ...@@ -246,8 +237,6 @@ u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
/* i915_guc_submission.c */ /* i915_guc_submission.c */
int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_init(struct drm_i915_private *dev_priv);
int i915_guc_submission_enable(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
int i915_guc_wq_reserve(struct drm_i915_gem_request *rq);
void i915_guc_wq_unreserve(struct drm_i915_gem_request *request);
void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
void i915_guc_submission_fini(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment