Commit 2871ea85 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Split intel_ring_submission

Split the legacy submission backend from the common CS ring buffer
handling.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191024100344.5041-1-chris@chris-wilson.co.uk
parent 2c9a4915
...@@ -89,11 +89,12 @@ gt-y += \ ...@@ -89,11 +89,12 @@ gt-y += \
gt/intel_gt_requests.o \ gt/intel_gt_requests.o \
gt/intel_llc.o \ gt/intel_llc.o \
gt/intel_lrc.o \ gt/intel_lrc.o \
gt/intel_mocs.o \
gt/intel_rc6.o \ gt/intel_rc6.o \
gt/intel_renderstate.o \ gt/intel_renderstate.o \
gt/intel_reset.o \ gt/intel_reset.o \
gt/intel_ringbuffer.o \ gt/intel_ring.o \
gt/intel_mocs.o \ gt/intel_ring_submission.o \
gt/intel_sseu.o \ gt/intel_sseu.o \
gt/intel_timeline.o \ gt/intel_timeline.o \
gt/intel_workarounds.o gt/intel_workarounds.o
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_reg.h" #include "i915_reg.h"
......
...@@ -69,9 +69,10 @@ ...@@ -69,9 +69,10 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "gt/intel_lrc_reg.h"
#include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_user.h" #include "gt/intel_engine_user.h"
#include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
#include "i915_gem_context.h" #include "i915_gem_context.h"
#include "i915_globals.h" #include "i915_globals.h"
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "gt/intel_engine_pool.h" #include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_clflush.h" #include "i915_gem_clflush.h"
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_engine_pool.h" #include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_ring.h"
#include "i915_gem_clflush.h" #include "i915_gem_clflush.h"
#include "i915_gem_object_blt.h" #include "i915_gem_object_blt.h"
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#include "selftests/i915_random.h" #include "selftests/i915_random.h"
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "intel_context.h" #include "intel_context.h"
#include "intel_engine.h" #include "intel_engine.h"
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_ring.h"
static struct i915_global_context { static struct i915_global_context {
struct i915_global base; struct i915_global base;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "i915_active.h" #include "i915_active.h"
#include "intel_context_types.h" #include "intel_context_types.h"
#include "intel_engine_types.h" #include "intel_engine_types.h"
#include "intel_ring_types.h"
#include "intel_timeline_types.h" #include "intel_timeline_types.h"
void intel_context_init(struct intel_context *ce, void intel_context_init(struct intel_context *ce,
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "intel_workarounds.h" #include "intel_workarounds.h"
struct drm_printer; struct drm_printer;
struct intel_gt; struct intel_gt;
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
...@@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) ...@@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_HWS_CSB_WRITE_INDEX 0x1f #define I915_HWS_CSB_WRITE_INDEX 0x1f
#define CNL_HWS_CSB_WRITE_INDEX 0x2f #define CNL_HWS_CSB_WRITE_INDEX 0x2f
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
unsigned int intel_ring_update_space(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct kref *ref);
static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
{
kref_get(&ring->ref);
return ring;
}
static inline void intel_ring_put(struct intel_ring *ring)
{
kref_put(&ring->ref, intel_ring_free);
}
void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine);
int __must_check intel_ring_cacheline_align(struct i915_request *rq);
u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
{
/* Dummy function.
*
* This serves as a placeholder in the code so that the reader
* can compare against the preceding intel_ring_begin() and
* check that the number of dwords emitted matches the space
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
}
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
{
return pos & (ring->size - 1);
}
static inline bool
intel_ring_offset_valid(const struct intel_ring *ring,
unsigned int pos)
{
if (pos & -ring->size) /* must be strictly within the ring */
return false;
if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
return false;
return true;
}
static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
u32 offset = addr - rq->ring->vaddr;
GEM_BUG_ON(offset > rq->ring->size);
return intel_ring_wrap(rq->ring, offset);
}
static inline void
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
{
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
/*
* "Ring Buffer Use"
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6
* Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
* Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
* same cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*
* We use ring->head as the last known location of the actual RING_HEAD,
* it may have advanced but in the worst case it is equally the same
* as ring->head and so we should never program RING_TAIL to advance
* into the same cacheline as ring->head.
*/
#define cacheline(a) round_down(a, CACHELINE_BYTES)
GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
tail < ring->head);
#undef cacheline
}
static inline unsigned int
intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
{
/* Whilst writes to the tail are strictly order, there is no
* serialisation between readers and the writers. The tail may be
* read by i915_request_retire() just as it is being updated
* by execlists, as although the breadcrumb is complete, the context
* switch hasn't been seen.
*/
assert_ring_tail_valid(ring, tail);
ring->tail = tail;
return tail;
}
static inline unsigned int
__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
{
/*
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
* same cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*/
GEM_BUG_ON(!is_power_of_2(size));
return (head - tail - CACHELINE_BYTES) & (size - 1);
}
int intel_engines_init_mmio(struct intel_gt *gt); int intel_engines_init_mmio(struct intel_gt *gt);
int intel_engines_setup(struct intel_gt *gt); int intel_engines_setup(struct intel_gt *gt);
int intel_engines_init(struct intel_gt *gt); int intel_engines_init(struct intel_gt *gt);
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "intel_context.h" #include "intel_context.h"
#include "intel_lrc.h" #include "intel_lrc.h"
#include "intel_reset.h" #include "intel_reset.h"
#include "intel_ring.h"
/* Haswell does have the CXT_SIZE register however it does not appear to be /* Haswell does have the CXT_SIZE register however it does not appear to be
* valid. Now, docs explain in dwords what is in the context object. The full * valid. Now, docs explain in dwords what is in the context object. The full
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_pm.h" #include "intel_gt_pm.h"
#include "intel_rc6.h" #include "intel_rc6.h"
#include "intel_ring.h"
static int __engine_unpark(struct intel_wakeref *wf) static int __engine_unpark(struct intel_wakeref *wf)
{ {
......
...@@ -59,6 +59,7 @@ struct i915_gem_context; ...@@ -59,6 +59,7 @@ struct i915_gem_context;
struct i915_request; struct i915_request;
struct i915_sched_attr; struct i915_sched_attr;
struct intel_gt; struct intel_gt;
struct intel_ring;
struct intel_uncore; struct intel_uncore;
typedef u8 intel_engine_mask_t; typedef u8 intel_engine_mask_t;
...@@ -77,32 +78,6 @@ struct intel_instdone { ...@@ -77,32 +78,6 @@ struct intel_instdone {
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
}; };
struct intel_ring {
struct kref ref;
struct i915_vma *vma;
void *vaddr;
/*
* As we have two types of rings, one global to the engine used
* by ringbuffer submission and those that are exclusive to a
* context used by execlists, we have to play safe and allow
* atomic updates to the pin_count. However, the actual pinning
* of the context is either done during initialisation for
* ringbuffer submission or serialised as part of the context
* pinning for execlists, and so we do not need a mutex ourselves
* to serialise intel_ring_pin/intel_ring_unpin.
*/
atomic_t pin_count;
u32 head;
u32 tail;
u32 emit;
u32 space;
u32 size;
u32 effective_size;
};
/* /*
* we use a single page to load ctx workarounds so all of these * we use a single page to load ctx workarounds so all of these
* values are referred in terms of dwords * values are referred in terms of dwords
......
...@@ -145,6 +145,7 @@ ...@@ -145,6 +145,7 @@
#include "intel_lrc_reg.h" #include "intel_lrc_reg.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#include "intel_reset.h" #include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
#define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST_QFULL (1 << 0x2)
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#include "intel_lrc.h" #include "intel_lrc.h"
#include "intel_ring.h"
/* structures required */ /* structures required */
struct drm_i915_mocs_entry { struct drm_i915_mocs_entry {
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_renderstate.h" #include "intel_renderstate.h"
#include "intel_ring.h"
struct intel_renderstate { struct intel_renderstate {
const struct intel_renderstate_rodata *rodata; const struct intel_renderstate_rodata *rodata;
......
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#include "gem/i915_gem_object.h"
#include "i915_drv.h"
#include "i915_vma.h"
#include "intel_engine.h"
#include "intel_ring.h"
#include "intel_timeline.h"
unsigned int intel_ring_update_space(struct intel_ring *ring)
{
unsigned int space;
space = __intel_ring_space(ring->head, ring->emit, ring->size);
ring->space = space;
return space;
}
int intel_ring_pin(struct intel_ring *ring)
{
struct i915_vma *vma = ring->vma;
unsigned int flags;
void *addr;
int ret;
if (atomic_fetch_inc(&ring->pin_count))
return 0;
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
goto err_unpin;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj,
i915_coherent_map_type(vma->vm->i915));
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
goto err_ring;
}
i915_vma_make_unshrinkable(vma);
GEM_BUG_ON(ring->vaddr);
ring->vaddr = addr;
return 0;
err_ring:
i915_vma_unpin(vma);
err_unpin:
atomic_dec(&ring->pin_count);
return ret;
}
void intel_ring_reset(struct intel_ring *ring, u32 tail)
{
tail = intel_ring_wrap(ring, tail);
ring->tail = tail;
ring->head = tail;
ring->emit = tail;
intel_ring_update_space(ring);
}
void intel_ring_unpin(struct intel_ring *ring)
{
struct i915_vma *vma = ring->vma;
if (!atomic_dec_and_test(&ring->pin_count))
return;
/* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->emit);
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
GEM_BUG_ON(!ring->vaddr);
ring->vaddr = NULL;
i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
{
struct i915_address_space *vm = &ggtt->vm;
struct drm_i915_private *i915 = vm->i915;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
return ERR_CAST(obj);
/*
* Mark ring buffers as read-only from GPU side (so no stray overwrites)
* if supported by the platform's GGTT.
*/
if (vm->has_read_only)
i915_gem_object_set_readonly(obj);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma))
goto err;
return vma;
err:
i915_gem_object_put(obj);
return vma;
}
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_ring *ring;
struct i915_vma *vma;
GEM_BUG_ON(!is_power_of_2(size));
GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring)
return ERR_PTR(-ENOMEM);
kref_init(&ring->ref);
ring->size = size;
/*
* Workaround an erratum on the i830 which causes a hang if
* the TAIL pointer points to within the last 2 cachelines
* of the buffer.
*/
ring->effective_size = size;
if (IS_I830(i915) || IS_I845G(i915))
ring->effective_size -= 2 * CACHELINE_BYTES;
intel_ring_update_space(ring);
vma = create_ring_vma(engine->gt->ggtt, size);
if (IS_ERR(vma)) {
kfree(ring);
return ERR_CAST(vma);
}
ring->vma = vma;
return ring;
}
void intel_ring_free(struct kref *ref)
{
struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
i915_vma_put(ring->vma);
kfree(ring);
}
static noinline int
wait_for_space(struct intel_ring *ring,
struct intel_timeline *tl,
unsigned int bytes)
{
struct i915_request *target;
long timeout;
if (intel_ring_update_space(ring) >= bytes)
return 0;
GEM_BUG_ON(list_empty(&tl->requests));
list_for_each_entry(target, &tl->requests, link) {
if (target->ring != ring)
continue;
/* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix,
ring->emit, ring->size))
break;
}
if (GEM_WARN_ON(&target->link == &tl->requests))
return -ENOSPC;
timeout = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0)
return timeout;
i915_request_retire_upto(target);
intel_ring_update_space(ring);
GEM_BUG_ON(ring->space < bytes);
return 0;
}
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
{
struct intel_ring *ring = rq->ring;
const unsigned int remain_usable = ring->effective_size - ring->emit;
const unsigned int bytes = num_dwords * sizeof(u32);
unsigned int need_wrap = 0;
unsigned int total_bytes;
u32 *cs;
/* Packets must be qword aligned. */
GEM_BUG_ON(num_dwords & 1);
total_bytes = bytes + rq->reserved_space;
GEM_BUG_ON(total_bytes > ring->effective_size);
if (unlikely(total_bytes > remain_usable)) {
const int remain_actual = ring->size - ring->emit;
if (bytes > remain_usable) {
/*
* Not enough space for the basic request. So need to
* flush out the remainder and then wait for
* base + reserved.
*/
total_bytes += remain_actual;
need_wrap = remain_actual | 1;
} else {
/*
* The base request will fit but the reserved space
* falls off the end. So we don't need an immediate
* wrap and only need to effectively wait for the
* reserved size from the start of ringbuffer.
*/
total_bytes = rq->reserved_space + remain_actual;
}
}
if (unlikely(total_bytes > ring->space)) {
int ret;
/*
* Space is reserved in the ringbuffer for finalising the
* request, as that cannot be allowed to fail. During request
* finalisation, reserved_space is set to 0 to stop the
* overallocation and the assumption is that then we never need
* to wait (which has the risk of failing with EINTR).
*
* See also i915_request_alloc() and i915_request_add().
*/
GEM_BUG_ON(!rq->reserved_space);
ret = wait_for_space(ring,
i915_request_timeline(rq),
total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
if (unlikely(need_wrap)) {
need_wrap &= ~1;
GEM_BUG_ON(need_wrap > ring->space);
GEM_BUG_ON(ring->emit + need_wrap > ring->size);
GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
/* Fill the tail with MI_NOOP */
memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
ring->space -= need_wrap;
ring->emit = 0;
}
GEM_BUG_ON(ring->emit > ring->size - bytes);
GEM_BUG_ON(ring->space < bytes);
cs = ring->vaddr + ring->emit;
GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
ring->emit += bytes;
ring->space -= bytes;
return cs;
}
/* Align the ring tail to a cacheline boundary */
int intel_ring_cacheline_align(struct i915_request *rq)
{
int num_dwords;
void *cs;
num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
if (num_dwords == 0)
return 0;
num_dwords = CACHELINE_DWORDS - num_dwords;
GEM_BUG_ON(num_dwords & 1);
cs = intel_ring_begin(rq, num_dwords);
if (IS_ERR(cs))
return PTR_ERR(cs);
memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
intel_ring_advance(rq, cs + num_dwords);
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
return 0;
}
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#ifndef INTEL_RING_H
#define INTEL_RING_H
#include "i915_gem.h" /* GEM_BUG_ON */
#include "i915_request.h"
#include "intel_ring_types.h"
struct intel_engine_cs;
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
int intel_ring_pin(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
void intel_ring_free(struct kref *ref);
static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
{
kref_get(&ring->ref);
return ring;
}
static inline void intel_ring_put(struct intel_ring *ring)
{
kref_put(&ring->ref, intel_ring_free);
}
static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
{
/* Dummy function.
*
* This serves as a placeholder in the code so that the reader
* can compare against the preceding intel_ring_begin() and
* check that the number of dwords emitted matches the space
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
}
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
{
return pos & (ring->size - 1);
}
static inline bool
intel_ring_offset_valid(const struct intel_ring *ring,
unsigned int pos)
{
if (pos & -ring->size) /* must be strictly within the ring */
return false;
if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
return false;
return true;
}
static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
u32 offset = addr - rq->ring->vaddr;
GEM_BUG_ON(offset > rq->ring->size);
return intel_ring_wrap(rq->ring, offset);
}
static inline void
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
{
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
/*
* "Ring Buffer Use"
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6
* Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
* Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
* same cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*
* We use ring->head as the last known location of the actual RING_HEAD,
* it may have advanced but in the worst case it is equally the same
* as ring->head and so we should never program RING_TAIL to advance
* into the same cacheline as ring->head.
*/
#define cacheline(a) round_down(a, CACHELINE_BYTES)
GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
tail < ring->head);
#undef cacheline
}
static inline unsigned int
intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
{
/* Whilst writes to the tail are strictly order, there is no
* serialisation between readers and the writers. The tail may be
* read by i915_request_retire() just as it is being updated
* by execlists, as although the breadcrumb is complete, the context
* switch hasn't been seen.
*/
assert_ring_tail_valid(ring, tail);
ring->tail = tail;
return tail;
}
static inline unsigned int
__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
{
/*
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the
* same cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*/
GEM_BUG_ON(!is_power_of_2(size));
return (head - tail - CACHELINE_BYTES) & (size - 1);
}
#endif /* INTEL_RING_H */
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "intel_gt_irq.h" #include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h" #include "intel_gt_pm_irq.h"
#include "intel_reset.h" #include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
/* Rough estimate of the typical request size, performing a flush, /* Rough estimate of the typical request size, performing a flush,
...@@ -47,16 +48,6 @@ ...@@ -47,16 +48,6 @@
*/ */
#define LEGACY_REQUEST_SIZE 200 #define LEGACY_REQUEST_SIZE 200
unsigned int intel_ring_update_space(struct intel_ring *ring)
{
unsigned int space;
space = __intel_ring_space(ring->head, ring->emit, ring->size);
ring->space = space;
return space;
}
static int static int
gen2_render_ring_flush(struct i915_request *rq, u32 mode) gen2_render_ring_flush(struct i915_request *rq, u32 mode)
{ {
...@@ -1186,162 +1177,6 @@ i915_emit_bb_start(struct i915_request *rq, ...@@ -1186,162 +1177,6 @@ i915_emit_bb_start(struct i915_request *rq,
return 0; return 0;
} }
int intel_ring_pin(struct intel_ring *ring)
{
struct i915_vma *vma = ring->vma;
unsigned int flags;
void *addr;
int ret;
if (atomic_fetch_inc(&ring->pin_count))
return 0;
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
goto err_unpin;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj,
i915_coherent_map_type(vma->vm->i915));
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
goto err_ring;
}
i915_vma_make_unshrinkable(vma);
GEM_BUG_ON(ring->vaddr);
ring->vaddr = addr;
return 0;
err_ring:
i915_vma_unpin(vma);
err_unpin:
atomic_dec(&ring->pin_count);
return ret;
}
void intel_ring_reset(struct intel_ring *ring, u32 tail)
{
tail = intel_ring_wrap(ring, tail);
ring->tail = tail;
ring->head = tail;
ring->emit = tail;
intel_ring_update_space(ring);
}
void intel_ring_unpin(struct intel_ring *ring)
{
struct i915_vma *vma = ring->vma;
if (!atomic_dec_and_test(&ring->pin_count))
return;
/* Discard any unused bytes beyond that submitted to hw. */
intel_ring_reset(ring, ring->emit);
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
GEM_BUG_ON(!ring->vaddr);
ring->vaddr = NULL;
i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
{
struct i915_address_space *vm = &ggtt->vm;
struct drm_i915_private *i915 = vm->i915;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
return ERR_CAST(obj);
/*
* Mark ring buffers as read-only from GPU side (so no stray overwrites)
* if supported by the platform's GGTT.
*/
if (vm->has_read_only)
i915_gem_object_set_readonly(obj);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma))
goto err;
return vma;
err:
i915_gem_object_put(obj);
return vma;
}
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_ring *ring;
struct i915_vma *vma;
GEM_BUG_ON(!is_power_of_2(size));
GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring)
return ERR_PTR(-ENOMEM);
kref_init(&ring->ref);
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
* the TAIL pointer points to within the last 2 cachelines
* of the buffer.
*/
ring->effective_size = size;
if (IS_I830(i915) || IS_I845G(i915))
ring->effective_size -= 2 * CACHELINE_BYTES;
intel_ring_update_space(ring);
vma = create_ring_vma(engine->gt->ggtt, size);
if (IS_ERR(vma)) {
kfree(ring);
return ERR_CAST(vma);
}
ring->vma = vma;
return ring;
}
void intel_ring_free(struct kref *ref)
{
struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
i915_vma_put(ring->vma);
kfree(ring);
}
static void __ring_context_fini(struct intel_context *ce) static void __ring_context_fini(struct intel_context *ce)
{ {
i915_vma_put(ce->state); i915_vma_put(ce->state);
...@@ -1836,148 +1671,6 @@ static int ring_request_alloc(struct i915_request *request) ...@@ -1836,148 +1671,6 @@ static int ring_request_alloc(struct i915_request *request)
return 0; return 0;
} }
static noinline int
wait_for_space(struct intel_ring *ring,
struct intel_timeline *tl,
unsigned int bytes)
{
struct i915_request *target;
long timeout;
if (intel_ring_update_space(ring) >= bytes)
return 0;
GEM_BUG_ON(list_empty(&tl->requests));
list_for_each_entry(target, &tl->requests, link) {
if (target->ring != ring)
continue;
/* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix,
ring->emit, ring->size))
break;
}
if (GEM_WARN_ON(&target->link == &tl->requests))
return -ENOSPC;
timeout = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0)
return timeout;
i915_request_retire_upto(target);
intel_ring_update_space(ring);
GEM_BUG_ON(ring->space < bytes);
return 0;
}
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
{
struct intel_ring *ring = rq->ring;
const unsigned int remain_usable = ring->effective_size - ring->emit;
const unsigned int bytes = num_dwords * sizeof(u32);
unsigned int need_wrap = 0;
unsigned int total_bytes;
u32 *cs;
/* Packets must be qword aligned. */
GEM_BUG_ON(num_dwords & 1);
total_bytes = bytes + rq->reserved_space;
GEM_BUG_ON(total_bytes > ring->effective_size);
if (unlikely(total_bytes > remain_usable)) {
const int remain_actual = ring->size - ring->emit;
if (bytes > remain_usable) {
/*
* Not enough space for the basic request. So need to
* flush out the remainder and then wait for
* base + reserved.
*/
total_bytes += remain_actual;
need_wrap = remain_actual | 1;
} else {
/*
* The base request will fit but the reserved space
* falls off the end. So we don't need an immediate
* wrap and only need to effectively wait for the
* reserved size from the start of ringbuffer.
*/
total_bytes = rq->reserved_space + remain_actual;
}
}
if (unlikely(total_bytes > ring->space)) {
int ret;
/*
* Space is reserved in the ringbuffer for finalising the
* request, as that cannot be allowed to fail. During request
* finalisation, reserved_space is set to 0 to stop the
* overallocation and the assumption is that then we never need
* to wait (which has the risk of failing with EINTR).
*
* See also i915_request_alloc() and i915_request_add().
*/
GEM_BUG_ON(!rq->reserved_space);
ret = wait_for_space(ring,
i915_request_timeline(rq),
total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
if (unlikely(need_wrap)) {
need_wrap &= ~1;
GEM_BUG_ON(need_wrap > ring->space);
GEM_BUG_ON(ring->emit + need_wrap > ring->size);
GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
/* Fill the tail with MI_NOOP */
memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
ring->space -= need_wrap;
ring->emit = 0;
}
GEM_BUG_ON(ring->emit > ring->size - bytes);
GEM_BUG_ON(ring->space < bytes);
cs = ring->vaddr + ring->emit;
GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
ring->emit += bytes;
ring->space -= bytes;
return cs;
}
/* Align the ring tail to a cacheline boundary */
int intel_ring_cacheline_align(struct i915_request *rq)
{
int num_dwords;
void *cs;
num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
if (num_dwords == 0)
return 0;
num_dwords = CACHELINE_DWORDS - num_dwords;
GEM_BUG_ON(num_dwords & 1);
cs = intel_ring_begin(rq, num_dwords);
if (IS_ERR(cs))
return PTR_ERR(cs);
memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
intel_ring_advance(rq, cs);
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
return 0;
}
static void gen6_bsd_submit_request(struct i915_request *request) static void gen6_bsd_submit_request(struct i915_request *request)
{ {
struct intel_uncore *uncore = request->engine->uncore; struct intel_uncore *uncore = request->engine->uncore;
......
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/
#ifndef INTEL_RING_TYPES_H
#define INTEL_RING_TYPES_H
#include <linux/atomic.h>
#include <linux/kref.h>
#include <linux/types.h>
/*
* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
* to give some inclination as to some of the magic values used in the various
* workarounds!
*/
#define CACHELINE_BYTES 64
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
struct i915_vma;
struct intel_ring {
struct kref ref;
struct i915_vma *vma;
void *vaddr;
/*
* As we have two types of rings, one global to the engine used
* by ringbuffer submission and those that are exclusive to a
* context used by execlists, we have to play safe and allow
* atomic updates to the pin_count. However, the actual pinning
* of the context is either done during initialisation for
* ringbuffer submission or serialised as part of the context
* pinning for execlists, and so we do not need a mutex ourselves
* to serialise intel_ring_pin/intel_ring_unpin.
*/
atomic_t pin_count;
u32 head;
u32 tail;
u32 emit;
u32 space;
u32 size;
u32 effective_size;
};
#endif /* INTEL_RING_TYPES_H */
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
* Copyright © 2016-2018 Intel Corporation * Copyright © 2016-2018 Intel Corporation
*/ */
#include "gt/intel_gt_types.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_active.h" #include "i915_active.h"
#include "i915_syncmap.h" #include "i915_syncmap.h"
#include "gt/intel_timeline.h" #include "intel_gt.h"
#include "intel_ring.h"
#include "intel_timeline.h"
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_context.h" #include "intel_context.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_ring.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
/** /**
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
*/ */
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_context.h" #include "intel_context.h"
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_requests.h" #include "intel_gt_requests.h"
#include "intel_ring.h"
#include "../selftests/i915_random.h" #include "../selftests/i915_random.h"
#include "../i915_selftest.h" #include "../i915_selftest.h"
......
...@@ -6,12 +6,13 @@ ...@@ -6,12 +6,13 @@
#include <linux/circ_buf.h> #include <linux/circ_buf.h>
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gt/intel_lrc_reg.h" #include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
#include "intel_guc_submission.h" #include "intel_guc_submission.h"
#include "i915_drv.h" #include "i915_drv.h"
......
...@@ -35,7 +35,9 @@ ...@@ -35,7 +35,9 @@
*/ */
#include <linux/slab.h> #include <linux/slab.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "gt/intel_ring.h"
#include "gvt.h" #include "gvt.h"
#include "i915_pvinfo.h" #include "i915_pvinfo.h"
#include "trace.h" #include "trace.h"
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "gvt.h" #include "gvt.h"
#include "trace.h" #include "trace.h"
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "gvt.h" #include "gvt.h"
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/debugobjects.h> #include <linux/debugobjects.h>
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_active.h" #include "i915_active.h"
......
...@@ -200,6 +200,7 @@ ...@@ -200,6 +200,7 @@
#include "gt/intel_engine_user.h" #include "gt/intel_engine_user.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_lrc_reg.h" #include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_perf.h" #include "i915_perf.h"
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "i915_active.h" #include "i915_active.h"
#include "i915_drv.h" #include "i915_drv.h"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment