Commit abd50713 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Reimplement frequency driven sampling

There was a bug in the old period code that caused intel_pmu_enable_all()
or native_write_msr_safe() to show up quite high in the profiles.

In staring at that code it made my head hurt, so I rewrote it in a
hopefully simpler fashion. Its now fully symetric between tick and
overflow driven adjustments and uses less data to boot.

The only complication is that it basically wants to do a u128 division.
The code approximates that in a rather simple truncate until it fits
fashion, taking care to balance the terms while truncating.

This version does not generate that sampling artefact.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: <stable@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent ef12a141
...@@ -498,9 +498,8 @@ struct hw_perf_event { ...@@ -498,9 +498,8 @@ struct hw_perf_event {
atomic64_t period_left; atomic64_t period_left;
u64 interrupts; u64 interrupts;
u64 freq_count; u64 freq_time_stamp;
u64 freq_interrupts; u64 freq_count_stamp;
u64 freq_stamp;
#endif #endif
}; };
......
...@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task) ...@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task)
static void perf_log_throttle(struct perf_event *event, int enable); static void perf_log_throttle(struct perf_event *event, int enable);
static void perf_adjust_period(struct perf_event *event, u64 events) static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
u64 frequency = event->attr.sample_freq;
u64 sec = NSEC_PER_SEC;
u64 divisor, dividend;
int count_fls, nsec_fls, frequency_fls, sec_fls;
count_fls = fls64(count);
nsec_fls = fls64(nsec);
frequency_fls = fls64(frequency);
sec_fls = 30;
/*
* We got @count in @nsec, with a target of sample_freq HZ
* the target period becomes:
*
* @count * 10^9
* period = -------------------
* @nsec * sample_freq
*
*/
/*
* Reduce accuracy by one bit such that @a and @b converge
* to a similar magnitude.
*/
#define REDUCE_FLS(a, b) \
do { \
if (a##_fls > b##_fls) { \
a >>= 1; \
a##_fls--; \
} else { \
b >>= 1; \
b##_fls--; \
} \
} while (0)
/*
* Reduce accuracy until either term fits in a u64, then proceed with
* the other, so that finally we can do a u64/u64 division.
*/
while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
REDUCE_FLS(nsec, frequency);
REDUCE_FLS(sec, count);
}
if (count_fls + sec_fls > 64) {
divisor = nsec * frequency;
while (count_fls + sec_fls > 64) {
REDUCE_FLS(count, sec);
divisor >>= 1;
}
dividend = count * sec;
} else {
dividend = count * sec;
while (nsec_fls + frequency_fls > 64) {
REDUCE_FLS(nsec, frequency);
dividend >>= 1;
}
divisor = nsec * frequency;
}
return div64_u64(dividend, divisor);
}
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
u64 period, sample_period; u64 period, sample_period;
s64 delta; s64 delta;
events *= hwc->sample_period; period = perf_calculate_period(event, nsec, count);
period = div64_u64(events, event->attr.sample_freq);
delta = (s64)(period - hwc->sample_period); delta = (s64)(period - hwc->sample_period);
delta = (delta + 7) / 8; /* low pass filter */ delta = (delta + 7) / 8; /* low pass filter */
...@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) ...@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
sample_period = 1; sample_period = 1;
hwc->sample_period = sample_period; hwc->sample_period = sample_period;
if (atomic64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
event->pmu->disable(event);
atomic64_set(&hwc->period_left, 0);
event->pmu->enable(event);
perf_enable();
}
} }
static void perf_ctx_adjust_freq(struct perf_event_context *ctx) static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
{ {
struct perf_event *event; struct perf_event *event;
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
u64 interrupts, freq; u64 interrupts, now;
s64 delta;
raw_spin_lock(&ctx->lock); raw_spin_lock(&ctx->lock);
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
...@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) ...@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
if (interrupts == MAX_INTERRUPTS) { if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1); perf_log_throttle(event, 1);
event->pmu->unthrottle(event); event->pmu->unthrottle(event);
interrupts = 2*sysctl_perf_event_sample_rate/HZ;
} }
if (!event->attr.freq || !event->attr.sample_freq) if (!event->attr.freq || !event->attr.sample_freq)
continue; continue;
/* event->pmu->read(event);
* if the specified freq < HZ then we need to skip ticks now = atomic64_read(&event->count);
*/ delta = now - hwc->freq_count_stamp;
if (event->attr.sample_freq < HZ) { hwc->freq_count_stamp = now;
freq = event->attr.sample_freq;
hwc->freq_count += freq;
hwc->freq_interrupts += interrupts;
if (hwc->freq_count < HZ)
continue;
interrupts = hwc->freq_interrupts;
hwc->freq_interrupts = 0;
hwc->freq_count -= HZ;
} else
freq = HZ;
perf_adjust_period(event, freq * interrupts);
/* if (delta > 0)
* In order to avoid being stalled by an (accidental) huge perf_adjust_period(event, TICK_NSEC, delta);
* sample period, force reset the sample period if we didn't
* get any events in this freq period.
*/
if (!interrupts) {
perf_disable();
event->pmu->disable(event);
atomic64_set(&hwc->period_left, 0);
event->pmu->enable(event);
perf_enable();
}
} }
raw_spin_unlock(&ctx->lock); raw_spin_unlock(&ctx->lock);
} }
...@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, ...@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
if (event->attr.freq) { if (event->attr.freq) {
u64 now = perf_clock(); u64 now = perf_clock();
s64 delta = now - hwc->freq_stamp; s64 delta = now - hwc->freq_time_stamp;
hwc->freq_stamp = now; hwc->freq_time_stamp = now;
if (delta > 0 && delta < TICK_NSEC) if (delta > 0 && delta < 2*TICK_NSEC)
perf_adjust_period(event, NSEC_PER_SEC / (int)delta); perf_adjust_period(event, delta, hwc->last_period);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment