Commit d9e86c0e authored by Chris Wilson's avatar Chris Wilson

drm/i915: Pipelined fencing [infrastructure]

With this change, every batchbuffer can use all available fences (save
pinned and scanout, of course) without ever stalling the gpu!

In theory. Currently the actual pipelined update of the register is
disabled due to some stability issues. However, just the deferred update
is a significant win.

Based on a series of patches by Daniel Vetter.

The premise is that before every access to a buffer through the GTT we
have to declare whether we need a register or not. If the access is by
the GPU, a pipelined update to the register is made via the ringbuffer,
and we track the last seqno of the batches that access it. If by the
CPU we wait for the last GPU access and update the register (either
to clear or to set it for the current buffer).

One advantage of being able to pipeline changes is that we can defer the
actual updating of the fence register until we first need to access the
object through the GTT, i.e. we can eliminate the stall on set_tiling.
This is important as the userspace bo cache does not track the tiling
status of active buffers which generate frequent stalls on gen3 when
enabling tiling for an already bound buffer.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 87ca9c8a
...@@ -126,6 +126,7 @@ struct drm_i915_master_private { ...@@ -126,6 +126,7 @@ struct drm_i915_master_private {
struct drm_i915_fence_reg { struct drm_i915_fence_reg {
struct list_head lru_list; struct list_head lru_list;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
uint32_t setup_seqno;
}; };
struct sdvo_device_mapping { struct sdvo_device_mapping {
...@@ -752,6 +753,7 @@ struct drm_i915_gem_object { ...@@ -752,6 +753,7 @@ struct drm_i915_gem_object {
* Current tiling mode for the object. * Current tiling mode for the object.
*/ */
unsigned int tiling_mode : 2; unsigned int tiling_mode : 2;
unsigned int tiling_changed : 1;
/** How many users have pinned this object in GTT space. The following /** How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, pin_ioctl * users can each hold at most one reference: pwrite/pread, pin_ioctl
...@@ -1121,10 +1123,10 @@ i915_gem_next_request_seqno(struct drm_device *dev, ...@@ -1121,10 +1123,10 @@ i915_gem_next_request_seqno(struct drm_device *dev,
return ring->outstanding_lazy_request = dev_priv->next_seqno; return ring->outstanding_lazy_request = dev_priv->next_seqno;
} }
int __must_check i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj, int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
bool interruptible); struct intel_ring_buffer *pipelined,
int __must_check i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj, bool interruptible);
bool interruptible); int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests(struct drm_device *dev);
void i915_gem_reset(struct drm_device *dev); void i915_gem_reset(struct drm_device *dev);
......
This diff is collapsed.
...@@ -424,7 +424,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, ...@@ -424,7 +424,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
} }
static int static int
i915_gem_execbuffer_reserve(struct drm_device *dev, i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct drm_file *file, struct drm_file *file,
struct list_head *objects, struct list_head *objects,
struct drm_i915_gem_exec_object2 *exec) struct drm_i915_gem_exec_object2 *exec)
...@@ -499,10 +499,15 @@ i915_gem_execbuffer_reserve(struct drm_device *dev, ...@@ -499,10 +499,15 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
} }
if (need_fence) { if (need_fence) {
ret = i915_gem_object_get_fence_reg(obj, true); ret = i915_gem_object_get_fence(obj, ring, 1);
if (ret)
break;
} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode == I915_TILING_NONE) {
/* XXX pipelined! */
ret = i915_gem_object_put_fence(obj);
if (ret) if (ret)
break; break;
} }
obj->pending_fenced_gpu_access = need_fence; obj->pending_fenced_gpu_access = need_fence;
...@@ -522,7 +527,7 @@ i915_gem_execbuffer_reserve(struct drm_device *dev, ...@@ -522,7 +527,7 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
/* First attempt, just clear anything that is purgeable. /* First attempt, just clear anything that is purgeable.
* Second attempt, clear the entire GTT. * Second attempt, clear the entire GTT.
*/ */
ret = i915_gem_evict_everything(dev, retry == 0); ret = i915_gem_evict_everything(ring->dev, retry == 0);
if (ret) if (ret)
return ret; return ret;
...@@ -548,6 +553,7 @@ i915_gem_execbuffer_reserve(struct drm_device *dev, ...@@ -548,6 +553,7 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
static int static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev, i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct drm_file *file, struct drm_file *file,
struct intel_ring_buffer *ring,
struct list_head *objects, struct list_head *objects,
struct drm_i915_gem_exec_object2 *exec, struct drm_i915_gem_exec_object2 *exec,
int count) int count)
...@@ -590,7 +596,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, ...@@ -590,7 +596,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
goto err; goto err;
} }
ret = i915_gem_execbuffer_reserve(dev, file, objects, exec); ret = i915_gem_execbuffer_reserve(ring, file, objects, exec);
if (ret) if (ret)
goto err; goto err;
...@@ -930,7 +936,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ...@@ -930,7 +936,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
} }
/* Move the objects en-masse into the GTT, evicting if necessary. */ /* Move the objects en-masse into the GTT, evicting if necessary. */
ret = i915_gem_execbuffer_reserve(dev, file, &objects, exec); ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec);
if (ret) if (ret)
goto err; goto err;
...@@ -938,7 +944,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ...@@ -938,7 +944,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec); ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec);
if (ret) { if (ret) {
if (ret == -EFAULT) { if (ret == -EFAULT) {
ret = i915_gem_execbuffer_relocate_slow(dev, file, ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
&objects, exec, &objects, exec,
args->buffer_count); args->buffer_count);
BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(!mutex_is_locked(&dev->struct_mutex));
......
...@@ -244,9 +244,6 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) ...@@ -244,9 +244,6 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
if (INTEL_INFO(obj->base.dev)->gen >= 4) if (INTEL_INFO(obj->base.dev)->gen >= 4)
return true; return true;
if (!obj->gtt_space)
return true;
if (INTEL_INFO(obj->base.dev)->gen == 3) { if (INTEL_INFO(obj->base.dev)->gen == 3) {
if (obj->gtt_offset & ~I915_FENCE_START_MASK) if (obj->gtt_offset & ~I915_FENCE_START_MASK)
return false; return false;
...@@ -345,27 +342,21 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, ...@@ -345,27 +342,21 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
* tiling mode. Otherwise we can just leave it alone, but * tiling mode. Otherwise we can just leave it alone, but
* need to ensure that any fence register is cleared. * need to ensure that any fence register is cleared.
*/ */
if (!i915_gem_object_fence_ok(obj, args->tiling_mode)) i915_gem_release_mmap(obj);
ret = i915_gem_object_unbind(obj);
else if (obj->fence_reg != I915_FENCE_REG_NONE)
ret = i915_gem_object_put_fence_reg(obj, true);
else
i915_gem_release_mmap(obj);
if (ret != 0) { obj->map_and_fenceable =
args->tiling_mode = obj->tiling_mode; obj->gtt_space == NULL ||
args->stride = obj->stride; (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
goto err; i915_gem_object_fence_ok(obj, args->tiling_mode));
}
obj->tiling_changed = true;
obj->tiling_mode = args->tiling_mode; obj->tiling_mode = args->tiling_mode;
obj->stride = args->stride; obj->stride = args->stride;
} }
err:
drm_gem_object_unreference(&obj->base); drm_gem_object_unreference(&obj->base);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
return ret; return 0;
} }
/** /**
......
...@@ -1474,7 +1474,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, ...@@ -1474,7 +1474,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
* a fence as the cost is not that onerous. * a fence as the cost is not that onerous.
*/ */
if (obj->tiling_mode != I915_TILING_NONE) { if (obj->tiling_mode != I915_TILING_NONE) {
ret = i915_gem_object_get_fence_reg(obj, false); ret = i915_gem_object_get_fence(obj, pipelined, false);
if (ret) if (ret)
goto err_unpin; goto err_unpin;
} }
...@@ -4370,6 +4370,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, ...@@ -4370,6 +4370,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
/* we only need to pin inside GTT if cursor is non-phy */ /* we only need to pin inside GTT if cursor is non-phy */
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
if (!dev_priv->info->cursor_needs_physical) { if (!dev_priv->info->cursor_needs_physical) {
if (obj->tiling_mode) {
DRM_ERROR("cursor cannot be tiled\n");
ret = -EINVAL;
goto fail_locked;
}
ret = i915_gem_object_pin(obj, PAGE_SIZE, true); ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
if (ret) { if (ret) {
DRM_ERROR("failed to pin cursor bo\n"); DRM_ERROR("failed to pin cursor bo\n");
...@@ -4382,6 +4388,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, ...@@ -4382,6 +4388,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
goto fail_unpin; goto fail_unpin;
} }
ret = i915_gem_object_put_fence(obj);
if (ret) {
DRM_ERROR("failed to move cursor bo into the GTT\n");
goto fail_unpin;
}
addr = obj->gtt_offset; addr = obj->gtt_offset;
} else { } else {
int align = IS_I830(dev) ? 16 * 1024 : 256; int align = IS_I830(dev) ? 16 * 1024 : 256;
...@@ -4966,6 +4978,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) ...@@ -4966,6 +4978,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
i915_gem_object_unpin(work->old_fb_obj); i915_gem_object_unpin(work->old_fb_obj);
drm_gem_object_unreference(&work->pending_flip_obj->base); drm_gem_object_unreference(&work->pending_flip_obj->base);
drm_gem_object_unreference(&work->old_fb_obj->base); drm_gem_object_unreference(&work->old_fb_obj->base);
mutex_unlock(&work->dev->struct_mutex); mutex_unlock(&work->dev->struct_mutex);
kfree(work); kfree(work);
} }
...@@ -5009,10 +5022,12 @@ static void do_intel_finish_page_flip(struct drm_device *dev, ...@@ -5009,10 +5022,12 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
spin_unlock_irqrestore(&dev->event_lock, flags); spin_unlock_irqrestore(&dev->event_lock, flags);
obj = work->old_fb_obj; obj = work->old_fb_obj;
atomic_clear_mask(1 << intel_crtc->plane, atomic_clear_mask(1 << intel_crtc->plane,
&obj->pending_flip.counter); &obj->pending_flip.counter);
if (atomic_read(&obj->pending_flip) == 0) if (atomic_read(&obj->pending_flip) == 0)
wake_up(&dev_priv->pending_flip_queue); wake_up(&dev_priv->pending_flip_queue);
schedule_work(&work->work); schedule_work(&work->work);
trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
......
...@@ -787,6 +787,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, ...@@ -787,6 +787,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
if (ret != 0) if (ret != 0)
goto out_unpin; goto out_unpin;
ret = i915_gem_object_put_fence(new_bo);
if (ret)
goto out_unpin;
if (!overlay->active) { if (!overlay->active) {
regs = intel_overlay_map_regs(overlay); regs = intel_overlay_map_regs(overlay);
if (!regs) { if (!regs) {
...@@ -1161,6 +1165,12 @@ int intel_overlay_put_image(struct drm_device *dev, void *data, ...@@ -1161,6 +1165,12 @@ int intel_overlay_put_image(struct drm_device *dev, void *data,
mutex_lock(&dev->mode_config.mutex); mutex_lock(&dev->mode_config.mutex);
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
if (new_bo->tiling_mode) {
DRM_ERROR("buffer used for overlay image can not be tiled\n");
ret = -EINVAL;
goto out_unlock;
}
ret = intel_overlay_recover_from_interrupt(overlay, true); ret = intel_overlay_recover_from_interrupt(overlay, true);
if (ret != 0) if (ret != 0)
goto out_unlock; goto out_unlock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment