Commit 74d290f8 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Boost GPU clocks if we miss the pageflip's vblank

If we miss the current vblank because the gpu was busy, that may cause a
jitter as the frame rate temporarily drops. We try to limit the impact
of this by then boosting the GPU clock to deliver the frame as quickly
as possible. Originally done in commit 6ad790c0 ("drm/i915: Boost GPU
frequency if we detect outstanding pageflips") but was never forward
ported to atomic and finally dropped in commit fd3a4024 ("drm/i915:
Rip out legacy page_flip completion/irq handling").

One of the most typical use-cases for this is a mostly idle desktop.
Rendering one frame of the desktop's frontbuffer can easily be
accomplished by the GPU running at low frequency, but often exceeds
the time budget of the desktop compositor. The result is that animations
such as opening the menu, doing a fullscreen switch, or even just trying
to move a window around are slow and jerky. We need to respond within a
frame to give the best impression of a smooth UX, as a compromise we
instead respond if that first frame misses its goal. The result should
be a near-imperceivable initial delay and a smooth animation even
starting from idle. The cost, as ever, is that we spend more power than
is strictly necessary as we overestimate the required GPU frequency and
then try to ramp down.

This of course is reactionary, too little, too late; nevertheless it is
surprisingly effective.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102199Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170817123706.6777-1-chris@chris-wilson.co.ukTested-by: default avatarLyude Paul <lyude@redhat.com>
Reviewed-by: default avatarRadoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
parent 5f88a9c6
...@@ -12636,6 +12636,58 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { ...@@ -12636,6 +12636,58 @@ static const struct drm_crtc_funcs intel_crtc_funcs = {
.set_crc_source = intel_crtc_set_crc_source, .set_crc_source = intel_crtc_set_crc_source,
}; };
struct wait_rps_boost {
struct wait_queue_entry wait;
struct drm_crtc *crtc;
struct drm_i915_gem_request *request;
};
static int do_rps_boost(struct wait_queue_entry *_wait,
unsigned mode, int sync, void *key)
{
struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait);
struct drm_i915_gem_request *rq = wait->request;
gen6_rps_boost(rq, NULL);
i915_gem_request_put(rq);
drm_crtc_vblank_put(wait->crtc);
list_del(&wait->wait.entry);
kfree(wait);
return 1;
}
static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
struct dma_fence *fence)
{
struct wait_rps_boost *wait;
if (!dma_fence_is_i915(fence))
return;
if (INTEL_GEN(to_i915(crtc->dev)) < 6)
return;
if (drm_crtc_vblank_get(crtc))
return;
wait = kmalloc(sizeof(*wait), GFP_KERNEL);
if (!wait) {
drm_crtc_vblank_put(crtc);
return;
}
wait->request = to_request(dma_fence_get(fence));
wait->crtc = crtc;
wait->wait.func = do_rps_boost;
wait->wait.flags = 0;
add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait);
}
/** /**
* intel_prepare_plane_fb - Prepare fb for usage on plane * intel_prepare_plane_fb - Prepare fb for usage on plane
* @plane: drm plane to prepare for * @plane: drm plane to prepare for
...@@ -12733,12 +12785,22 @@ intel_prepare_plane_fb(struct drm_plane *plane, ...@@ -12733,12 +12785,22 @@ intel_prepare_plane_fb(struct drm_plane *plane,
return ret; return ret;
if (!new_state->fence) { /* implicit fencing */ if (!new_state->fence) { /* implicit fencing */
struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
obj->resv, NULL, obj->resv, NULL,
false, I915_FENCE_TIMEOUT, false, I915_FENCE_TIMEOUT,
GFP_KERNEL); GFP_KERNEL);
if (ret < 0) if (ret < 0)
return ret; return ret;
fence = reservation_object_get_excl_rcu(obj->resv);
if (fence) {
add_rps_boost_after_vblank(new_state->crtc, fence);
dma_fence_put(fence);
}
} else {
add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
} }
return 0; return 0;
......
...@@ -1849,7 +1849,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); ...@@ -1849,7 +1849,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_gem_request *rq, void gen6_rps_boost(struct drm_i915_gem_request *rq,
struct intel_rps_client *rps); struct intel_rps_client *rps);
void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
void g4x_wm_get_hw_state(struct drm_device *dev); void g4x_wm_get_hw_state(struct drm_device *dev);
void vlv_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev);
void ilk_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev);
......
...@@ -6169,6 +6169,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, ...@@ -6169,6 +6169,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
struct intel_rps_client *rps) struct intel_rps_client *rps)
{ {
struct drm_i915_private *i915 = rq->i915; struct drm_i915_private *i915 = rq->i915;
unsigned long flags;
bool boost; bool boost;
/* This is intentionally racy! We peek at the state here, then /* This is intentionally racy! We peek at the state here, then
...@@ -6178,13 +6179,13 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, ...@@ -6178,13 +6179,13 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
return; return;
boost = false; boost = false;
spin_lock_irq(&rq->lock); spin_lock_irqsave(&rq->lock, flags);
if (!rq->waitboost && !i915_gem_request_completed(rq)) { if (!rq->waitboost && !i915_gem_request_completed(rq)) {
atomic_inc(&i915->rps.num_waiters); atomic_inc(&i915->rps.num_waiters);
rq->waitboost = true; rq->waitboost = true;
boost = true; boost = true;
} }
spin_unlock_irq(&rq->lock); spin_unlock_irqrestore(&rq->lock, flags);
if (!boost) if (!boost)
return; return;
...@@ -9155,43 +9156,6 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) ...@@ -9155,43 +9156,6 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
} }
struct request_boost {
struct work_struct work;
struct drm_i915_gem_request *req;
};
static void __intel_rps_boost_work(struct work_struct *work)
{
struct request_boost *boost = container_of(work, struct request_boost, work);
struct drm_i915_gem_request *req = boost->req;
if (!i915_gem_request_completed(req))
gen6_rps_boost(req, NULL);
i915_gem_request_put(req);
kfree(boost);
}
void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
{
struct request_boost *boost;
if (req == NULL || INTEL_GEN(req->i915) < 6)
return;
if (i915_gem_request_completed(req))
return;
boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
if (boost == NULL)
return;
boost->req = i915_gem_request_get(req);
INIT_WORK(&boost->work, __intel_rps_boost_work);
queue_work(req->i915->wq, &boost->work);
}
void intel_pm_setup(struct drm_i915_private *dev_priv) void intel_pm_setup(struct drm_i915_private *dev_priv)
{ {
mutex_init(&dev_priv->rps.hw_lock); mutex_init(&dev_priv->rps.hw_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment