Commit 16ffe73c authored by Chris Wilson's avatar Chris Wilson

drm/i915/pmu: Use GT parked for estimating RC6 while asleep

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle
v4: No need for parked/unparked estimation if !CONFIG_PM
v5: Keep timer park/unpark logic as was
v6: Refactor duplicated estimate/update rc6 logic
v7: Pull intel_get_pm_get_if_awake() out from the pmu->lock.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190912124813.19225-1-chris@chris-wilson.co.uk
parent 8d8b0031
...@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) ...@@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
return switch_to_kernel_context_sync(&i915->gt); return switch_to_kernel_context_sync(&i915->gt);
} }
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
int count = atomic_read(&gt->user_wakeref);
/* Inside suspend/resume so single threaded, no races to worry about. */
if (likely(!count))
return;
intel_gt_pm_get(gt);
if (suspend) {
GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
atomic_sub(count, &gt->wakeref.count);
} else {
atomic_add(count, &gt->wakeref.count);
}
intel_gt_pm_put(gt);
}
void i915_gem_suspend(struct drm_i915_private *i915) void i915_gem_suspend(struct drm_i915_private *i915)
{ {
GEM_TRACE("\n"); GEM_TRACE("\n");
...@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) ...@@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
flush_workqueue(i915->wq); flush_workqueue(i915->wq);
user_forcewake(&i915->gt, true);
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
/* /*
...@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915) ...@@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
if (!i915_gem_load_power_context(i915)) if (!i915_gem_load_power_context(i915))
goto err_wedged; goto err_wedged;
user_forcewake(&i915->gt, false);
out_unlock: out_unlock:
intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
mutex_unlock(&i915->drm.struct_mutex); mutex_unlock(&i915->drm.struct_mutex);
......
...@@ -50,6 +50,7 @@ struct intel_gt { ...@@ -50,6 +50,7 @@ struct intel_gt {
} timelines; } timelines;
struct intel_wakeref wakeref; struct intel_wakeref wakeref;
atomic_t user_wakeref;
struct list_head closed_vma; struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */ spinlock_t closed_lock; /* guards the list of closed_vma */
......
...@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused) ...@@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_open(struct inode *inode, struct file *file)
{ {
struct drm_i915_private *i915 = inode->i_private; struct drm_i915_private *i915 = inode->i_private;
struct intel_gt *gt = &i915->gt;
if (INTEL_GEN(i915) < 6) atomic_inc(&gt->user_wakeref);
return 0; intel_gt_pm_get(gt);
if (INTEL_GEN(i915) >= 6)
file->private_data = intel_uncore_forcewake_user_get(gt->uncore);
(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
intel_uncore_forcewake_user_get(&i915->uncore);
return 0; return 0;
} }
...@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) ...@@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
static int i915_forcewake_release(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file)
{ {
struct drm_i915_private *i915 = inode->i_private; struct drm_i915_private *i915 = inode->i_private;
struct intel_gt *gt = &i915->gt;
if (INTEL_GEN(i915) < 6) if (INTEL_GEN(i915) >= 6)
return 0; intel_uncore_forcewake_user_put(&i915->uncore);
intel_gt_pm_put(gt);
intel_uncore_forcewake_user_put(&i915->uncore); atomic_dec(&gt->user_wakeref);
intel_runtime_pm_put(&i915->runtime_pm,
(intel_wakeref_t)(uintptr_t)file->private_data);
return 0; return 0;
} }
......
...@@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) ...@@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
return enable; return enable;
} }
void i915_pmu_gt_parked(struct drm_i915_private *i915) static u64 __get_rc6(const struct intel_gt *gt)
{ {
struct i915_pmu *pmu = &i915->pmu; struct drm_i915_private *i915 = gt->i915;
u64 val;
if (!pmu->base.event_init) val = intel_rc6_residency_ns(i915,
return; IS_VALLEYVIEW(i915) ?
VLV_GT_RENDER_RC6 :
GEN6_GT_GFX_RC6);
if (HAS_RC6p(i915))
val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
if (HAS_RC6pp(i915))
val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
return val;
}
#if IS_ENABLED(CONFIG_PM)
static inline s64 ktime_since(const ktime_t kt)
{
return ktime_to_ns(ktime_sub(ktime_get(), kt));
}
static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
{
u64 val;
spin_lock_irq(&pmu->lock);
/* /*
* Signal sampling timer to stop if only engine events are enabled and * We think we are runtime suspended.
* GPU went idle. *
* Report the delta from when the device was suspended to now,
* on top of the last known real value, as the approximated RC6
* counter value.
*/ */
pmu->timer_enabled = pmu_needs_timer(pmu, false); val = ktime_since(pmu->sleep_last);
spin_unlock_irq(&pmu->lock); val += pmu->sample[__I915_SAMPLE_RC6].cur;
pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
return val;
}
static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
{
/*
* If we are coming back from being runtime suspended we must
* be careful not to report a larger value than returned
* previously.
*/
if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
pmu->sample[__I915_SAMPLE_RC6].cur = val;
} else {
val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
}
return val;
}
static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_pmu *pmu = &i915->pmu;
unsigned long flags;
u64 val;
val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
intel_gt_pm_put(gt);
}
spin_lock_irqsave(&pmu->lock, flags);
if (val)
val = __pmu_update_rc6(pmu, val);
else
val = __pmu_estimate_rc6(pmu);
spin_unlock_irqrestore(&pmu->lock, flags);
return val;
}
static void park_rc6(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
__pmu_update_rc6(pmu, __get_rc6(&i915->gt));
pmu->sleep_last = ktime_get();
}
static void unpark_rc6(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
/* Estimate how long we slept and accumulate that into rc6 counters */
if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
__pmu_estimate_rc6(pmu);
}
#else
static u64 get_rc6(struct intel_gt *gt)
{
return __get_rc6(gt);
} }
static void park_rc6(struct drm_i915_private *i915) {}
static void unpark_rc6(struct drm_i915_private *i915) {}
#endif
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
{ {
if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
...@@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) ...@@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
} }
} }
void i915_pmu_gt_parked(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
if (!pmu->base.event_init)
return;
spin_lock_irq(&pmu->lock);
park_rc6(i915);
/*
* Signal sampling timer to stop if only engine events are enabled and
* GPU went idle.
*/
pmu->timer_enabled = pmu_needs_timer(pmu, false);
spin_unlock_irq(&pmu->lock);
}
void i915_pmu_gt_unparked(struct drm_i915_private *i915) void i915_pmu_gt_unparked(struct drm_i915_private *i915)
{ {
struct i915_pmu *pmu = &i915->pmu; struct i915_pmu *pmu = &i915->pmu;
...@@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915) ...@@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
return; return;
spin_lock_irq(&pmu->lock); spin_lock_irq(&pmu->lock);
/* /*
* Re-enable sampling timer when GPU goes active. * Re-enable sampling timer when GPU goes active.
*/ */
__i915_pmu_maybe_start_timer(pmu); __i915_pmu_maybe_start_timer(pmu);
unpark_rc6(i915);
spin_unlock_irq(&pmu->lock); spin_unlock_irq(&pmu->lock);
} }
...@@ -430,104 +556,6 @@ static int i915_pmu_event_init(struct perf_event *event) ...@@ -430,104 +556,6 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0; return 0;
} }
static u64 __get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
u64 val;
val = intel_rc6_residency_ns(i915,
IS_VALLEYVIEW(i915) ?
VLV_GT_RENDER_RC6 :
GEN6_GT_GFX_RC6);
if (HAS_RC6p(i915))
val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
if (HAS_RC6pp(i915))
val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
return val;
}
static u64 get_rc6(struct intel_gt *gt)
{
#if IS_ENABLED(CONFIG_PM)
struct drm_i915_private *i915 = gt->i915;
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_pmu *pmu = &i915->pmu;
intel_wakeref_t wakeref;
unsigned long flags;
u64 val;
wakeref = intel_runtime_pm_get_if_in_use(rpm);
if (wakeref) {
val = __get_rc6(gt);
intel_runtime_pm_put(rpm, wakeref);
/*
* If we are coming back from being runtime suspended we must
* be careful not to report a larger value than returned
* previously.
*/
spin_lock_irqsave(&pmu->lock, flags);
if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
pmu->sample[__I915_SAMPLE_RC6].cur = val;
} else {
val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
}
spin_unlock_irqrestore(&pmu->lock, flags);
} else {
struct device *kdev = rpm->kdev;
/*
* We are runtime suspended.
*
* Report the delta from when the device was suspended to now,
* on top of the last known real value, as the approximated RC6
* counter value.
*/
spin_lock_irqsave(&pmu->lock, flags);
/*
* After the above branch intel_runtime_pm_get_if_in_use failed
* to get the runtime PM reference we cannot assume we are in
* runtime suspend since we can either: a) race with coming out
* of it before we took the power.lock, or b) there are other
* states than suspended which can bring us here.
*
* We need to double-check that we are indeed currently runtime
* suspended and if not we cannot do better than report the last
* known RC6 value.
*/
if (pm_runtime_status_suspended(kdev)) {
val = pm_runtime_suspended_time(kdev);
if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
pmu->suspended_time_last = val;
val -= pmu->suspended_time_last;
val += pmu->sample[__I915_SAMPLE_RC6].cur;
pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
} else {
val = pmu->sample[__I915_SAMPLE_RC6].cur;
}
spin_unlock_irqrestore(&pmu->lock, flags);
}
return val;
#else
return __get_rc6(gt);
#endif
}
static u64 __i915_pmu_event_read(struct perf_event *event) static u64 __i915_pmu_event_read(struct perf_event *event)
{ {
struct drm_i915_private *i915 = struct drm_i915_private *i915 =
......
...@@ -97,9 +97,9 @@ struct i915_pmu { ...@@ -97,9 +97,9 @@ struct i915_pmu {
*/ */
struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
/** /**
* @suspended_time_last: Cached suspend time from PM core. * @sleep_last: Last time GT parked for RC6 estimation.
*/ */
u64 suspended_time_last; ktime_t sleep_last;
/** /**
* @i915_attr: Memory block holding device attributes. * @i915_attr: Memory block holding device attributes.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment