Commit e5dadff4 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Protect request retirement with timeline->mutex

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190815205709.24285-4-chris@chris-wilson.co.uk
parent ccb23d2d
...@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb) ...@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0; return 0;
} }
static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
{
struct i915_request *rq;
/*
* Completely unscientific finger-in-the-air estimates for suitable
* maximum user request size (to avoid blocking) and then backoff.
*/
if (intel_ring_update_space(ring) >= PAGE_SIZE)
return NULL;
/*
* Find a request that after waiting upon, there will be at least half
* the ring available. The hysteresis allows us to compete for the
* shared ring and should mean that we sleep less often prior to
* claiming our resources, but not so long that the ring completely
* drains before we can submit our next request.
*/
list_for_each_entry(rq, &ring->request_list, ring_link) {
if (__intel_ring_space(rq->postfix,
ring->emit, ring->size) > ring->size / 2)
break;
}
if (&rq->ring_link == &ring->request_list)
return NULL; /* weird, we will check again later for real */
return i915_request_get(rq);
}
static int eb_wait_for_ring(const struct i915_execbuffer *eb)
{
struct i915_request *rq;
int ret = 0;
/*
* Apply a light amount of backpressure to prevent excessive hogs
* from blocking waiting for space whilst holding struct_mutex and
* keeping all of their resources pinned.
*/
rq = __eb_wait_for_ring(eb->context->ring);
if (rq) {
mutex_unlock(&eb->i915->drm.struct_mutex);
if (i915_request_wait(rq,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT) < 0)
ret = -EINTR;
i915_request_put(rq);
mutex_lock(&eb->i915->drm.struct_mutex);
}
return ret;
}
static int eb_lookup_vmas(struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb)
{ {
struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
...@@ -2134,10 +2077,75 @@ static const enum intel_engine_id user_ring_map[] = { ...@@ -2134,10 +2077,75 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0 [I915_EXEC_VEBOX] = VECS0
}; };
static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) static struct i915_request *eb_throttle(struct intel_context *ce)
{
struct intel_ring *ring = ce->ring;
struct intel_timeline *tl = ce->timeline;
struct i915_request *rq;
/*
* Completely unscientific finger-in-the-air estimates for suitable
* maximum user request size (to avoid blocking) and then backoff.
*/
if (intel_ring_update_space(ring) >= PAGE_SIZE)
return NULL;
/*
* Find a request that after waiting upon, there will be at least half
* the ring available. The hysteresis allows us to compete for the
* shared ring and should mean that we sleep less often prior to
* claiming our resources, but not so long that the ring completely
* drains before we can submit our next request.
*/
list_for_each_entry(rq, &tl->requests, link) {
if (rq->ring != ring)
continue;
if (__intel_ring_space(rq->postfix,
ring->emit, ring->size) > ring->size / 2)
break;
}
if (&rq->link == &tl->requests)
return NULL; /* weird, we will check again later for real */
return i915_request_get(rq);
}
static int
__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
{ {
int err; int err;
if (likely(atomic_inc_not_zero(&ce->pin_count)))
return 0;
err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
if (err)
return err;
err = __intel_context_do_pin(ce);
mutex_unlock(&eb->i915->drm.struct_mutex);
return err;
}
static void
__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
{
if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
return;
mutex_lock(&eb->i915->drm.struct_mutex);
intel_context_unpin(ce);
mutex_unlock(&eb->i915->drm.struct_mutex);
}
static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
{
struct intel_timeline *tl;
struct i915_request *rq;
int err;
/* /*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged. * EIO if the GPU is already wedged.
...@@ -2151,7 +2159,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) ...@@ -2151,7 +2159,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
* GGTT space, so do this first before we reserve a seqno for * GGTT space, so do this first before we reserve a seqno for
* ourselves. * ourselves.
*/ */
err = intel_context_pin(ce); err = __eb_pin_context(eb, ce);
if (err) if (err)
return err; return err;
...@@ -2163,23 +2171,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) ...@@ -2163,23 +2171,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
* until the timeline is idle, which in turn releases the wakeref * until the timeline is idle, which in turn releases the wakeref
* taken on the engine, and the parent device. * taken on the engine, and the parent device.
*/ */
err = intel_context_timeline_lock(ce); tl = intel_context_timeline_lock(ce);
if (err) if (IS_ERR(tl)) {
err = PTR_ERR(tl);
goto err_unpin; goto err_unpin;
}
intel_context_enter(ce); intel_context_enter(ce);
intel_context_timeline_unlock(ce); rq = eb_throttle(ce);
intel_context_timeline_unlock(tl);
if (rq) {
if (i915_request_wait(rq,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT) < 0) {
i915_request_put(rq);
err = -EINTR;
goto err_exit;
}
i915_request_put(rq);
}
eb->engine = ce->engine; eb->engine = ce->engine;
eb->context = ce; eb->context = ce;
return 0; return 0;
err_exit:
mutex_lock(&tl->mutex);
intel_context_exit(ce);
intel_context_timeline_unlock(tl);
err_unpin: err_unpin:
intel_context_unpin(ce); __eb_unpin_context(eb, ce);
return err; return err;
} }
static void eb_unpin_context(struct i915_execbuffer *eb) static void eb_unpin_engine(struct i915_execbuffer *eb)
{ {
struct intel_context *ce = eb->context; struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
...@@ -2188,7 +2216,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb) ...@@ -2188,7 +2216,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
intel_context_exit(ce); intel_context_exit(ce);
mutex_unlock(&tl->mutex); mutex_unlock(&tl->mutex);
intel_context_unpin(ce); __eb_unpin_context(eb, ce);
} }
static unsigned int static unsigned int
...@@ -2233,9 +2261,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, ...@@ -2233,9 +2261,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
} }
static int static int
eb_select_engine(struct i915_execbuffer *eb, eb_pin_engine(struct i915_execbuffer *eb,
struct drm_file *file, struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args) struct drm_i915_gem_execbuffer2 *args)
{ {
struct intel_context *ce; struct intel_context *ce;
unsigned int idx; unsigned int idx;
...@@ -2250,7 +2278,7 @@ eb_select_engine(struct i915_execbuffer *eb, ...@@ -2250,7 +2278,7 @@ eb_select_engine(struct i915_execbuffer *eb,
if (IS_ERR(ce)) if (IS_ERR(ce))
return PTR_ERR(ce); return PTR_ERR(ce);
err = eb_pin_context(eb, ce); err = __eb_pin_engine(eb, ce);
intel_context_put(ce); intel_context_put(ce);
return err; return err;
...@@ -2468,16 +2496,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2468,16 +2496,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err)) if (unlikely(err))
goto err_destroy; goto err_destroy;
err = i915_mutex_lock_interruptible(dev); err = eb_pin_engine(&eb, file, args);
if (err)
goto err_context;
err = eb_select_engine(&eb, file, args);
if (unlikely(err)) if (unlikely(err))
goto err_unlock; goto err_context;
err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ err = i915_mutex_lock_interruptible(dev);
if (unlikely(err)) if (err)
goto err_engine; goto err_engine;
err = eb_relocate(&eb); err = eb_relocate(&eb);
...@@ -2635,10 +2659,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, ...@@ -2635,10 +2659,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err_vma: err_vma:
if (eb.exec) if (eb.exec)
eb_release_vmas(&eb); eb_release_vmas(&eb);
err_engine:
eb_unpin_context(&eb);
err_unlock:
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
err_engine:
eb_unpin_engine(&eb);
err_context: err_context:
i915_gem_context_put(eb.gem_context); i915_gem_context_put(eb.gem_context);
err_destroy: err_destroy:
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "i915_active.h" #include "i915_active.h"
#include "intel_context_types.h" #include "intel_context_types.h"
#include "intel_engine_types.h" #include "intel_engine_types.h"
#include "intel_timeline_types.h"
void intel_context_init(struct intel_context *ce, void intel_context_init(struct intel_context *ce,
struct i915_gem_context *ctx, struct i915_gem_context *ctx,
...@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce) ...@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce)
kref_put(&ce->ref, ce->ops->destroy); kref_put(&ce->ref, ce->ops->destroy);
} }
static inline int __must_check static inline struct intel_timeline *__must_check
intel_context_timeline_lock(struct intel_context *ce) intel_context_timeline_lock(struct intel_context *ce)
__acquires(&ce->timeline->mutex) __acquires(&ce->timeline->mutex)
{ {
return mutex_lock_interruptible(&ce->timeline->mutex); struct intel_timeline *tl = ce->timeline;
int err;
err = mutex_lock_interruptible(&tl->mutex);
if (err)
return ERR_PTR(err);
return tl;
} }
static inline void intel_context_timeline_unlock(struct intel_context *ce) static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
__releases(&ce->timeline->mutex) __releases(&tl->mutex)
{ {
mutex_unlock(&ce->timeline->mutex); mutex_unlock(&tl->mutex);
} }
int intel_context_prepare_remote_request(struct intel_context *ce, int intel_context_prepare_remote_request(struct intel_context *ce,
......
...@@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) ...@@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
engine->status_page.vma)) engine->status_page.vma))
goto out_frame; goto out_frame;
INIT_LIST_HEAD(&frame->ring.request_list);
frame->ring.vaddr = frame->cs; frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs); frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size; frame->ring.effective_size = frame->ring.size;
......
...@@ -69,9 +69,6 @@ struct intel_ring { ...@@ -69,9 +69,6 @@ struct intel_ring {
struct i915_vma *vma; struct i915_vma *vma;
void *vaddr; void *vaddr;
struct list_head request_list;
struct list_head active_link;
/* /*
* As we have two types of rings, one global to the engine used * As we have two types of rings, one global to the engine used
* by ringbuffer submission and those that are exclusive to a * by ringbuffer submission and those that are exclusive to a
......
...@@ -15,8 +15,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) ...@@ -15,8 +15,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
spin_lock_init(&gt->irq_lock); spin_lock_init(&gt->irq_lock);
INIT_LIST_HEAD(&gt->active_rings);
INIT_LIST_HEAD(&gt->closed_vma); INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock); spin_lock_init(&gt->closed_lock);
......
...@@ -48,8 +48,6 @@ struct intel_gt { ...@@ -48,8 +48,6 @@ struct intel_gt {
struct list_head hwsp_free_list; struct list_head hwsp_free_list;
} timelines; } timelines;
struct list_head active_rings;
struct intel_wakeref wakeref; struct intel_wakeref wakeref;
struct list_head closed_vma; struct list_head closed_vma;
......
...@@ -1583,6 +1583,7 @@ static void execlists_context_unpin(struct intel_context *ce) ...@@ -1583,6 +1583,7 @@ static void execlists_context_unpin(struct intel_context *ce)
{ {
i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj); i915_gem_object_unpin_map(ce->state->obj);
intel_ring_reset(ce->ring, ce->ring->tail);
} }
static void static void
......
...@@ -1250,7 +1250,7 @@ void intel_ring_unpin(struct intel_ring *ring) ...@@ -1250,7 +1250,7 @@ void intel_ring_unpin(struct intel_ring *ring)
return; return;
/* Discard any unused bytes beyond that submitted to hw. */ /* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->tail); intel_ring_reset(ring, ring->emit);
i915_vma_unset_ggtt_write(vma); i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma)) if (i915_vma_is_map_and_fenceable(vma))
...@@ -1311,7 +1311,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) ...@@ -1311,7 +1311,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
kref_init(&ring->ref); kref_init(&ring->ref);
INIT_LIST_HEAD(&ring->request_list);
ring->size = size; ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if /* Workaround an erratum on the i830 which causes a hang if
...@@ -1865,7 +1864,10 @@ static int ring_request_alloc(struct i915_request *request) ...@@ -1865,7 +1864,10 @@ static int ring_request_alloc(struct i915_request *request)
return 0; return 0;
} }
static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) static noinline int
wait_for_space(struct intel_ring *ring,
struct intel_timeline *tl,
unsigned int bytes)
{ {
struct i915_request *target; struct i915_request *target;
long timeout; long timeout;
...@@ -1873,15 +1875,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) ...@@ -1873,15 +1875,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
if (intel_ring_update_space(ring) >= bytes) if (intel_ring_update_space(ring) >= bytes)
return 0; return 0;
GEM_BUG_ON(list_empty(&ring->request_list)); GEM_BUG_ON(list_empty(&tl->requests));
list_for_each_entry(target, &ring->request_list, ring_link) { list_for_each_entry(target, &tl->requests, link) {
if (target->ring != ring)
continue;
/* Would completion of this request free enough space? */ /* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix, if (bytes <= __intel_ring_space(target->postfix,
ring->emit, ring->size)) ring->emit, ring->size))
break; break;
} }
if (WARN_ON(&target->ring_link == &ring->request_list)) if (GEM_WARN_ON(&target->link == &tl->requests))
return -ENOSPC; return -ENOSPC;
timeout = i915_request_wait(target, timeout = i915_request_wait(target,
...@@ -1948,7 +1953,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) ...@@ -1948,7 +1953,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
*/ */
GEM_BUG_ON(!rq->reserved_space); GEM_BUG_ON(!rq->reserved_space);
ret = wait_for_space(ring, total_bytes); ret = wait_for_space(ring, rq->timeline, total_bytes);
if (unlikely(ret)) if (unlikely(ret))
return ERR_PTR(ret); return ERR_PTR(ret);
} }
......
...@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ...@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
ring->vaddr = (void *)(ring + 1); ring->vaddr = (void *)(ring + 1);
atomic_set(&ring->pin_count, 1); atomic_set(&ring->pin_count, 1);
INIT_LIST_HEAD(&ring->request_list);
intel_ring_update_space(ring); intel_ring_update_space(ring);
return ring; return ring;
......
...@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq) ...@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq)
i915_request_add(rq); i915_request_add(rq);
timeout = i915_request_wait(rq, 0, HZ / 10); timeout = i915_request_wait(rq, 0, HZ / 10);
if (timeout < 0) if (timeout < 0) {
err = timeout; err = timeout;
else } else {
mutex_lock(&rq->timeline->mutex);
i915_request_retire_upto(rq); i915_request_retire_upto(rq);
mutex_unlock(&rq->timeline->mutex);
}
i915_request_put(rq); i915_request_put(rq);
...@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce) ...@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
int err = 0; int err = 0;
mutex_lock(&tl->mutex);
do { do {
struct i915_request *rq; struct i915_request *rq;
long timeout; long timeout;
...@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce) ...@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce)
i915_request_put(rq); i915_request_put(rq);
} while (!err); } while (!err);
mutex_unlock(&tl->mutex);
return err; return err;
} }
......
...@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request) ...@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request)
spin_unlock(&file_priv->mm.lock); spin_unlock(&file_priv->mm.lock);
} }
static void advance_ring(struct i915_request *request)
{
struct intel_ring *ring = request->ring;
unsigned int tail;
/*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
* of the GPU head.
*
* Note this requires that we are always called in request
* completion order.
*/
GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
if (list_is_last(&request->ring_link, &ring->request_list)) {
/*
* We may race here with execlists resubmitting this request
* as we retire it. The resubmission will move the ring->tail
* forwards (to request->wa_tail). We either read the
* current value that was written to hw, or the value that
* is just about to be. Either works, if we miss the last two
* noops - they are safe to be replayed on a reset.
*/
tail = READ_ONCE(request->tail);
list_del(&ring->active_link);
} else {
tail = request->postfix;
}
list_del_init(&request->ring_link);
ring->head = tail;
}
static void free_capture_list(struct i915_request *request) static void free_capture_list(struct i915_request *request)
{ {
struct i915_capture_list *capture; struct i915_capture_list *capture;
...@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq) ...@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq)
{ {
struct i915_active_request *active, *next; struct i915_active_request *active, *next;
lockdep_assert_held(&rq->i915->drm.struct_mutex); lockdep_assert_held(&rq->timeline->mutex);
if (!i915_request_completed(rq)) if (!i915_request_completed(rq))
return false; return false;
...@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq) ...@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq); trace_i915_request_retire(rq);
advance_ring(rq); /*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
* of the GPU head.
*
* Note this requires that we are always called in request
* completion order.
*/
GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
rq->ring->head = rq->postfix;
/* /*
* Walk through the active list, calling retire on each. This allows * Walk through the active list, calling retire on each. This allows
...@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq) ...@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq)
void i915_request_retire_upto(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq)
{ {
struct intel_ring *ring = rq->ring; struct intel_timeline * const tl = rq->timeline;
struct i915_request *tmp; struct i915_request *tmp;
GEM_TRACE("%s fence %llx:%lld, current %d\n", GEM_TRACE("%s fence %llx:%lld, current %d\n",
...@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq) ...@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq)
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq)); hwsp_seqno(rq));
lockdep_assert_held(&rq->i915->drm.struct_mutex); lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!i915_request_completed(rq)); GEM_BUG_ON(!i915_request_completed(rq));
if (list_empty(&rq->ring_link))
return;
do { do {
tmp = list_first_entry(&ring->request_list, tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
typeof(*tmp), ring_link);
} while (i915_request_retire(tmp) && tmp != rq); } while (i915_request_retire(tmp) && tmp != rq);
} }
...@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) ...@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE; return NOTIFY_DONE;
} }
static void ring_retire_requests(struct intel_ring *ring) static void retire_requests(struct intel_timeline *tl)
{ {
struct i915_request *rq, *rn; struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) list_for_each_entry_safe(rq, rn, &tl->requests, link)
if (!i915_request_retire(rq)) if (!i915_request_retire(rq))
break; break;
} }
static noinline struct i915_request * static noinline struct i915_request *
request_alloc_slow(struct intel_context *ce, gfp_t gfp) request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
{ {
struct intel_ring *ring = ce->ring;
struct i915_request *rq; struct i915_request *rq;
if (list_empty(&ring->request_list)) if (list_empty(&tl->requests))
goto out; goto out;
if (!gfpflags_allow_blocking(gfp)) if (!gfpflags_allow_blocking(gfp))
goto out; goto out;
/* Move our oldest request to the slab-cache (if not in use!) */ /* Move our oldest request to the slab-cache (if not in use!) */
rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); rq = list_first_entry(&tl->requests, typeof(*rq), link);
i915_request_retire(rq); i915_request_retire(rq);
rq = kmem_cache_alloc(global.slab_requests, rq = kmem_cache_alloc(global.slab_requests,
...@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) ...@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
return rq; return rq;
/* Ratelimit ourselves to prevent oom from malicious clients */ /* Ratelimit ourselves to prevent oom from malicious clients */
rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); rq = list_last_entry(&tl->requests, typeof(*rq), link);
cond_synchronize_rcu(rq->rcustate); cond_synchronize_rcu(rq->rcustate);
/* Retire our old requests in the hope that we free some */ /* Retire our old requests in the hope that we free some */
ring_retire_requests(ring); retire_requests(tl);
out: out:
return kmem_cache_alloc(global.slab_requests, gfp); return kmem_cache_alloc(global.slab_requests, gfp);
...@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) ...@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq = kmem_cache_alloc(global.slab_requests, rq = kmem_cache_alloc(global.slab_requests,
gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!rq)) { if (unlikely(!rq)) {
rq = request_alloc_slow(ce, gfp); rq = request_alloc_slow(tl, gfp);
if (!rq) { if (!rq) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_unreserve; goto err_unreserve;
...@@ -742,15 +713,15 @@ struct i915_request * ...@@ -742,15 +713,15 @@ struct i915_request *
i915_request_create(struct intel_context *ce) i915_request_create(struct intel_context *ce)
{ {
struct i915_request *rq; struct i915_request *rq;
int err; struct intel_timeline *tl;
err = intel_context_timeline_lock(ce); tl = intel_context_timeline_lock(ce);
if (err) if (IS_ERR(tl))
return ERR_PTR(err); return ERR_CAST(tl);
/* Move our oldest request to the slab-cache (if not in use!) */ /* Move our oldest request to the slab-cache (if not in use!) */
rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); rq = list_first_entry(&tl->requests, typeof(*rq), link);
if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) if (!list_is_last(&rq->link, &tl->requests))
i915_request_retire(rq); i915_request_retire(rq);
intel_context_enter(ce); intel_context_enter(ce);
...@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce) ...@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce)
goto err_unlock; goto err_unlock;
/* Check that we do not interrupt ourselves with a new request */ /* Check that we do not interrupt ourselves with a new request */
rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); rq->cookie = lockdep_pin_lock(&tl->mutex);
return rq; return rq;
err_unlock: err_unlock:
intel_context_timeline_unlock(ce); intel_context_timeline_unlock(tl);
return rq; return rq;
} }
static int static int
i915_request_await_start(struct i915_request *rq, struct i915_request *signal) i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
{ {
if (list_is_first(&signal->ring_link, &signal->ring->request_list)) if (list_is_first(&signal->link, &signal->timeline->requests))
return 0; return 0;
signal = list_prev_entry(signal, ring_link); signal = list_prev_entry(signal, link);
if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
return 0; return 0;
...@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) ...@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
struct intel_ring *ring = rq->ring; struct intel_ring *ring = rq->ring;
struct i915_request *prev;
u32 *cs; u32 *cs;
GEM_TRACE("%s fence %llx:%lld\n", GEM_TRACE("%s fence %llx:%lld\n",
...@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) ...@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
*/ */
GEM_BUG_ON(rq->reserved_space > ring->space); GEM_BUG_ON(rq->reserved_space > ring->space);
rq->reserved_space = 0; rq->reserved_space = 0;
rq->emitted_jiffies = jiffies;
/* /*
* Record the position of the start of the breadcrumb so that * Record the position of the start of the breadcrumb so that
...@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) ...@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
GEM_BUG_ON(IS_ERR(cs)); GEM_BUG_ON(IS_ERR(cs));
rq->postfix = intel_ring_offset(rq, cs); rq->postfix = intel_ring_offset(rq, cs);
prev = __i915_request_add_to_timeline(rq); return __i915_request_add_to_timeline(rq);
list_add_tail(&rq->ring_link, &ring->request_list);
if (list_is_first(&rq->ring_link, &ring->request_list))
list_add(&ring->active_link, &rq->i915->gt.active_rings);
rq->emitted_jiffies = jiffies;
return prev;
} }
void __i915_request_queue(struct i915_request *rq, void __i915_request_queue(struct i915_request *rq,
...@@ -1212,10 +1176,11 @@ void __i915_request_queue(struct i915_request *rq, ...@@ -1212,10 +1176,11 @@ void __i915_request_queue(struct i915_request *rq,
void i915_request_add(struct i915_request *rq) void i915_request_add(struct i915_request *rq)
{ {
struct i915_sched_attr attr = rq->gem_context->sched; struct i915_sched_attr attr = rq->gem_context->sched;
struct intel_timeline * const tl = rq->timeline;
struct i915_request *prev; struct i915_request *prev;
lockdep_assert_held(&rq->timeline->mutex); lockdep_assert_held(&tl->mutex);
lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); lockdep_unpin_lock(&tl->mutex, rq->cookie);
trace_i915_request_add(rq); trace_i915_request_add(rq);
...@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq) ...@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
* work on behalf of others -- but instead we should benefit from * work on behalf of others -- but instead we should benefit from
* improved resource management. (Well, that's the theory at least.) * improved resource management. (Well, that's the theory at least.)
*/ */
if (prev && i915_request_completed(prev)) if (prev && i915_request_completed(prev) && prev->timeline == tl)
i915_request_retire_upto(prev); i915_request_retire_upto(prev);
mutex_unlock(&rq->timeline->mutex); mutex_unlock(&tl->mutex);
} }
static unsigned long local_clock_us(unsigned int *cpu) static unsigned long local_clock_us(unsigned int *cpu)
...@@ -1489,18 +1454,43 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1489,18 +1454,43 @@ long i915_request_wait(struct i915_request *rq,
bool i915_retire_requests(struct drm_i915_private *i915) bool i915_retire_requests(struct drm_i915_private *i915)
{ {
struct intel_ring *ring, *tmp; struct intel_gt_timelines *timelines = &i915->gt.timelines;
struct intel_timeline *tl, *tn;
LIST_HEAD(free);
spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
if (!mutex_trylock(&tl->mutex))
continue;
lockdep_assert_held(&i915->drm.struct_mutex); intel_timeline_get(tl);
GEM_BUG_ON(!tl->active_count);
tl->active_count++; /* pin the list element */
spin_unlock(&timelines->lock);
list_for_each_entry_safe(ring, tmp, retire_requests(tl);
&i915->gt.active_rings, active_link) {
intel_ring_get(ring); /* last rq holds reference! */ spin_lock(&timelines->lock);
ring_retire_requests(ring);
intel_ring_put(ring); /* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
if (!--tl->active_count)
list_del(&tl->link);
mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(tl->active_count);
list_add(&tl->link, &free);
}
} }
spin_unlock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
return !list_empty(&i915->gt.active_rings); return !list_empty(&timelines->active_list);
} }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
......
...@@ -223,9 +223,6 @@ struct i915_request { ...@@ -223,9 +223,6 @@ struct i915_request {
/** timeline->request entry for this request */ /** timeline->request entry for this request */
struct list_head link; struct list_head link;
/** ring->request_list entry for this request */
struct list_head ring_link;
struct drm_i915_file_private *file_priv; struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */ /** file_priv list entry for this request */
struct list_head client_link; struct list_head client_link;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment