Commit bd46a4f1 authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

[PATCH] Time interpolator: Scalability enhancements and high resolution time for IA64

This has been in the ia64 (and hence -mm) trees for a couple of months.

Changelog:
 * Affects only architectures which define CONFIG_TIME_INTERPOLATION
   (currently only IA64)
 * Genericize time interpolation, make time interpolators easily usable
   and provide instructions on how to use the interpolator for other
   architectures.
 * Provide nanosecond resolution for clock_gettime and an accuracy
   up to the time interpolator time base.
 * clock_getres() reports resolution of underlying time basis which
   is typically <50ns and may be 1ns on some systems.
 * Make time interpolator self-tuning to limit time jumps
   and to make the interpolators work correctly on systems with
   broken time base specifications.
 * SMP scalability: Make clock_gettime and gettimeofday scale O(1)
   by removing the cmpxchg for most clocks (tested for up to 512 CPUs)
 * IA64: provide asm fastcall that doubles the performance
   of gettimeofday and clock_gettime on SGI and other IA64 systems
   (asm fastcalls scale O(1) together with the scalability fixes).
 * IA64: provide nojitter kernel option so that IA64 systems with
   correctly synchronized ITC counters may also enjoy the
   scalability enhancements.

Performance measurements for single calls (ITC cycles):

A. 4 way Intel IA64 SMP system (kmart)

ITC offsets:
kmart:/usr/src/noship-tests # dmesg|grep synchr
CPU 1: synchronized ITC with CPU 0 (last diff 1 cycles, maxerr 417 cycles)
CPU 2: synchronized ITC with CPU 0 (last diff 2 cycles, maxerr 417 cycles)
CPU 3: synchronized ITC with CPU 0 (last diff 1 cycles, maxerr 417 cycles)

A.1. Current kernel code

kmart:/usr/src/noship-tests # ./dmt
gettimeofday cycles: 3737 220 215 215 215 215 215 215 215 215
clock_gettime(REAL) cycles: 4058 575 564 576 565 566 558 558 558 558
clock_gettime(MONO) cycles: 1583 621 609 609 609 609 609 609 609 609
clock_gettime(PROCESS) cycles: 71428 298 259 259 259 259 259 259 259 259
clock_gettime(THREAD) cycles: 3982 336 290 298 298 298 298 286 286 286

A.2 New code using cmpxchg

kmart:/usr/src/noship-tests # ./dmt
gettimeofday cycles: 3145 213 216 213 213 213 213 213 213 213
clock_gettime(REAL) cycles: 3185 230 210 210 210 210 210 210 210 210
clock_gettime(MONO) cycles: 284 217 217 216 216 216 216 216 216 216
clock_gettime(PROCESS) cycles: 68857 289 270 259 259 259 259 259 259 259
clock_gettime(THREAD) cycles: 3862 339 298 298 298 298 290 286 286 286

A.3 New code with cmpxchg switched off (nojitter kernel option)

kmart:/usr/src/noship-tests # ./dmt
gettimeofday cycles: 3195 219 219 212 212 212 212 212 212 212
clock_gettime(REAL) cycles: 3003 228 205 205 205 205 205 205 205 205
clock_gettime(MONO) cycles: 279 209 209 209 208 208 208 208 208 208
clock_gettime(PROCESS) cycles: 65849 292 259 259 268 270 270 259 259 259

B. SGI SN2 system running 512 IA64 CPUs.

B.1. Current kernel code

[root@ascender noship-tests]# ./dmt
gettimeofday cycles: 17221 1028 1007 1004 1004 1004 1010 25928 1002 1003
clock_gettime(REAL) cycles: 10388 1099 1055 1044 1064 1063 1051 1056 1061 1056
clock_gettime(MONO) cycles: 2363 96 96 96 96 96 96 96 96 96
clock_gettime(PROCESS) cycles: 46537 804 660 666 666 666 666 666 666 666
clock_gettime(THREAD) cycles: 10945 727 710 684 685 686 685 686 685 686

B.2 New code

ascender:~/noship-tests # ./dmt
gettimeofday cycles: 3874 610 588 588 588 588 588 588 588 588
clock_gettime(REAL) cycles: 3893 612 588 582 588 588 588 588 588 588
clock_gettime(MONO) cycles: 686 595 595 588 588 588 588 588 588 588
clock_gettime(PROCESS) cycles: 290759 322 269 269 259 265 265 265 259 259
clock_gettime(THREAD) cycles: 5153 358 306 298 296 304 290 298 298 298

Scalability of time functions (in time it takes to do a million calls):
=======================================================================

A. 4 way Intel IA SMP system (kmart)
A.1 Current code

kmart:/usr/src/noship-tests # ./todscale -n1000000
 CPUS       WALL  WALL/CPUS
    1      0.192      0.192
    2      1.125      0.563
    4      9.229      2.307

A.2 New code using cmpxchg

kmart:/usr/src/noship-tests # ./todscale
 CPUS       WALL  WALL/CPUS
    1      0.188      0.188
    2      0.457      0.229
    4      0.413      0.103

(the measurement with 4 cpus may fluctuate up to 15.x somehow)

A.3 New code without cmpxchg (nojitter kernel option)

kmart:/usr/src/noship-tests # ./todscale -n10000000
 CPUS       WALL  WALL/CPUS
    1      0.180      0.180
    2      0.180      0.090
    4      0.252      0.063

B. SGI SN2 system running 512 IA64 CPUs.

The system has a global monotonic clock and therefore has
no need for compensation. Current code uses a cmpxchg. New
code has no cmpxchg.

B.1 current code

ascender:~/noship-tests # ./todscale
 CPUS       WALL  WALL/CPUS
    1      0.850      0.850
    2      1.767      0.884
    4      6.124      1.531
    8     20.777      2.597
   16     57.693      3.606
   32    164.688      5.146
   64    456.647      7.135
  128   1093.371      8.542
  256   2778.257     10.853
(System crash at 512 CPUs)

B.2 New code

ascender:~/noship-tests # ./todscale -n1000000
 CPUS       WALL  WALL/CPUS
    1      0.426      0.426
    2      0.429      0.215
    4      0.436      0.109
    8      0.452      0.057
   16      0.454      0.028
   32      0.457      0.014
   64      0.459      0.007
  128      0.466      0.004
  256      0.474      0.002
  512      0.518      0.001

Clock Accuracy
==============
A. 4 CPU SMP system

A.1 Old code

kmart:/usr/src/noship-tests # ./cdisp
          Gettimeofday() = 1092124757.270305000
           CLOCK_REALTIME= 1092124757.270382000 resolution= 0.000976563
          CLOCK_MONOTONIC=         89.696726590 resolution= 0.000976563
 CLOCK_PROCESS_CPUTIME_ID=          0.001242507 resolution= 0.000000001
  CLOCK_THREAD_CPUTIME_ID=          0.001255310 resolution= 0.000000001

A.2 New code

kmart:/usr/src/noship-tests # ./cdisp
          Gettimeofday() = 1092124478.194530000
           CLOCK_REALTIME= 1092124478.194603399 resolution= 0.000000001
          CLOCK_MONOTONIC=         88.198315204 resolution= 0.000000001
 CLOCK_PROCESS_CPUTIME_ID=          0.001241235 resolution= 0.000000001
  CLOCK_THREAD_CPUTIME_ID=          0.001254747 resolution= 0.000000001
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c9daeae6
Time Interpolators
------------------
Time interpolators are a base of time calculation between timer ticks and
allow an accurate determination of time down to the accuracy of the time
source in nanoseconds.
The architecture specific code typically provides gettimeofday and
settimeofday under Linux. The time interpolator provides both if an arch
defines CONFIG_TIME_INTERPOLATION. The arch still must set up timer tick
operations and call the necessary functions to advance the clock.
With the time interpolator a standardized interface exists for time
interpolation between ticks which also allows the determination
of time in a hardware independent way. The provided logic is highly scalable
and has been tested in SMP situations of up to 512 CPUs.
If CONFIG_TIME_INTERPOLATION is defined then the architecture specific code
(or the device drivers - like HPET) must register time interpolators.
These are typically defined in the following way:
static struct time_interpolator my_interpolator;
void time_init(void)
{
....
/* Initialization of the timer *.
my_interpolator.frequency = MY_FREQUENCY;
my_interpolator.source = TIME_SOURCE_MMIO32;
my_interpolator.address = &my_timer;
my_interpolator.shift = 32; /* increase accuracy of scaling */
my_interpolator.drift = -1; /* Unknown */
my_interpolator.jitter = 0; /* A stable time source */
register_time_interpolator(&my_interpolator);
....
}
For more details see include/linux/timex.h.
Christoph Lameter <christoph@lameter.com>, September 8, 2004
...@@ -188,9 +188,6 @@ void foo(void) ...@@ -188,9 +188,6 @@ void foo(void)
DEFINE(IA64_CLONE_VM, CLONE_VM); DEFINE(IA64_CLONE_VM, CLONE_VM);
BLANK(); BLANK();
/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
DEFINE(IA64_CPUINFO_ITM_DELTA_OFFSET, offsetof (struct cpuinfo_ia64, itm_delta));
DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next));
DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, offsetof (struct cpuinfo_ia64, nsec_per_cyc)); DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, offsetof (struct cpuinfo_ia64, nsec_per_cyc));
DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
...@@ -202,5 +199,21 @@ void foo(void) ...@@ -202,5 +199,21 @@ void foo(void)
BLANK(); BLANK();
DEFINE(IA64_MCA_TLB_INFO_SIZE, sizeof (struct ia64_mca_tlb_info)); DEFINE(IA64_MCA_TLB_INFO_SIZE, sizeof (struct ia64_mca_tlb_info));
/* used by head.S */
DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, offsetof (struct cpuinfo_ia64, nsec_per_cyc));
BLANK();
/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
} }
...@@ -17,62 +17,10 @@ void __init cyclone_setup(void) ...@@ -17,62 +17,10 @@ void __init cyclone_setup(void)
use_cyclone = 1; use_cyclone = 1;
} }
static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
static u32 last_update_cyclone;
static unsigned long offset_base;
static unsigned long get_offset_cyclone(void)
{
u32 now;
unsigned long offset;
/* Read the cyclone timer */
now = readl(cyclone_timer);
/* .. relative to previous update*/
offset = now - last_update_cyclone;
/* convert cyclone ticks to nanoseconds */
offset = (offset*NSEC_PER_SEC)/CYCLONE_TIMER_FREQ;
/* our adjusted time in nanoseconds */
return offset_base + offset;
}
static void update_cyclone(long delta_nsec)
{
u32 now;
unsigned long offset;
/* Read the cyclone timer */
now = readl(cyclone_timer);
/* .. relative to previous update*/
offset = now - last_update_cyclone;
/* convert cyclone ticks to nanoseconds */
offset = (offset*NSEC_PER_SEC)/CYCLONE_TIMER_FREQ;
offset += offset_base;
/* Be careful about signed/unsigned comparisons here: */
if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
offset_base = offset - delta_nsec;
else
offset_base = 0;
last_update_cyclone = now;
}
static void reset_cyclone(void)
{
offset_base = 0;
last_update_cyclone = readl(cyclone_timer);
}
struct time_interpolator cyclone_interpolator = { struct time_interpolator cyclone_interpolator = {
.get_offset = get_offset_cyclone, .source = TIME_SOURCE_MMIO32,
.update = update_cyclone, .shift = 32,
.reset = reset_cyclone,
.frequency = CYCLONE_TIMER_FREQ, .frequency = CYCLONE_TIMER_FREQ,
.drift = -100, .drift = -100,
}; };
...@@ -83,6 +31,7 @@ int __init init_cyclone_clock(void) ...@@ -83,6 +31,7 @@ int __init init_cyclone_clock(void)
u64 base; /* saved cyclone base address */ u64 base; /* saved cyclone base address */
u64 offset; /* offset from pageaddr to cyclone_timer register */ u64 offset; /* offset from pageaddr to cyclone_timer register */
int i; int i;
u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
if (!use_cyclone) if (!use_cyclone)
return -ENODEV; return -ENODEV;
...@@ -150,7 +99,7 @@ int __init init_cyclone_clock(void) ...@@ -150,7 +99,7 @@ int __init init_cyclone_clock(void)
} }
} }
/* initialize last tick */ /* initialize last tick */
last_update_cyclone = readl(cyclone_timer); cyclone_interpolator.addr = cyclone_timer;
register_time_interpolator(&cyclone_interpolator); register_time_interpolator(&cyclone_interpolator);
return 0; return 0;
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
* 18-Feb-03 louisk Implement fsys_gettimeofday(). * 18-Feb-03 louisk Implement fsys_gettimeofday().
* 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
* probably broke it along the way... ;-) * probably broke it along the way... ;-)
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
* it capable of using memory based clocks without falling back to C code.
*/ */
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
...@@ -144,195 +146,206 @@ ENTRY(fsys_set_tid_address) ...@@ -144,195 +146,206 @@ ENTRY(fsys_set_tid_address)
END(fsys_set_tid_address) END(fsys_set_tid_address)
/* /*
* Note 1: This routine uses floating-point registers, but only with registers that * Ensure that the time interpolator structure is compatible with the asm code
* operate on integers. Because of that, we don't need to set ar.fpsr to the
* kernel default value.
*
* Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
* If that wasn't the case, we would have to disable preemption (e.g.,
* by disabling interrupts) between reading the ITC and reading
* local_cpu_data->nsec_per_cyc.
*
* Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
* we ought to either skip the ITC-based interpolation or run an ntp-like
* daemon to keep the ITCs from drifting too far apart.
*/ */
#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
#error fsys_gettimeofday incompatible with changes to struct time_interpolator
#endif
#define CLOCK_REALTIME 0
#define CLOCK_MONOTONIC 1
#define CLOCK_DIVIDE_BY_1000 0x4000
#define CLOCK_ADD_MONOTONIC 0x8000
ENTRY(fsys_gettimeofday) ENTRY(fsys_gettimeofday)
.prologue .prologue
.altrp b6 .altrp b6
.body .body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16 mov r31 = r32
addl r3=THIS_CPU(cpu_info),r0 tnat.nz p6,p0 = r33 // guard against NaT argument
(p6) br.cond.spnt.few .fail_einval
#ifdef CONFIG_SMP mov r30 = CLOCK_DIVIDE_BY_1000
movl r10=__per_cpu_offset ;;
movl r2=sal_platform_features .gettime:
;; // Register map
// Incoming r31 = pointer to address where to place result
ld8 r2=[r2] // r30 = flags determining how time is processed
movl r19=xtime // xtime is a timespec struct // r2,r3 = temp r4-r7 preserved
// r8 = result nanoseconds
ld8 r10=[r10] // r10 <- __per_cpu_offset[0] // r9 = result seconds
addl r21=THIS_CPU(cpu_info),r0 // r10 = temporary storage for clock difference
;; // r11 = preserved: saved ar.pfs
add r10=r21, r10 // r10 <- &cpu_data(time_keeper_id) // r12 = preserved: memory stack
tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT // r13 = preserved: thread pointer
(p8) br.spnt.many fsys_fallback_syscall // r14 = debug pointer / usable
#else // r15 = preserved: system call number
;; // r16 = preserved: current task pointer
mov r10=r3 // r17 = wall to monotonic use
movl r19=xtime // xtime is a timespec struct // r18 = time_interpolator->offset
#endif // r19 = address of wall_to_monotonic
ld4 r9=[r9] // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
movl r17=xtime_lock // r21 = shift factor
;; // r22 = address of time interpolator->last_counter
// r23 = address of time_interpolator->last_cycle
// r32, r33 should contain the 2 args of gettimeofday // r24 = adress of time_interpolator->offset
adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10 // r25 = last_cycle value
mov r2=-1 // r26 = last_counter value
tnat.nz p6,p7=r32 // guard against NaT args // r27 = pointer to xtime
;; // r28 = sequence number at the beginning of critcal section
// r29 = address of seqlock
adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10 // r30 = time processing flags / memory address
(p7) tnat.nz p6,p0=r33 // r31 = pointer to result
(p6) br.cond.spnt.few .fail_einval // Predicates
// p6,p7 short term use
adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3 // p8 = timesource ar.itc
movl r24=2361183241434822607 // for division hack (only for / 1000) // p9 = timesource mmio64
;; // p10 = timesource mmio32
// p11 = timesource not to be handled by asm code
ldf8 f7=[r10] // f7 now contains itm_delta // p12 = memory time source ( = p9 | p10)
setf.sig f11=r2 // p13 = do cmpxchg with time_interpolator_last_cycle
adds r10=8, r32 // p14 = Divide by 1000
// p15 = Add monotonic
adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19 // r20 = &xtime->tv_nsec //
movl r26=jiffies // Note that instructions are optimized for McKinley. McKinley can process two
// bundles simultaneously and therefore we continuously try to feed the CPU
setf.sig f9=r24 // f9 is used for division hack // two bundles and then a stop.
movl r27=wall_jiffies tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
mov pr = r30,0xc000 // Set predicates according to function
and r9=TIF_ALLWORK_MASK,r9 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
movl r25=last_nsec_offset movl r20 = time_interpolator
;; ;;
ld8 r20 = [r20] // get pointer to time_interpolator structure
/* movl r29 = xtime_lock
* Verify that we have permission to write to struct timeval. Note: ld4 r2 = [r2] // process work pending flags
* Another thread might unmap the mapping before we actually get movl r27 = xtime
* to store the result. That's OK as long as the stores are also ;; // only one bundle here
* protect by EX(). ld8 r21 = [r20] // first quad with control information
*/ and r2 = TIF_ALLWORK_MASK,r2
EX(.fail_efault, probe.w.fault r32, 3) // this must come _after_ NaT-check (p6) br.cond.spnt.few .fail_einval // deferred branch
EX(.fail_efault, probe.w.fault r10, 3) // this must come _after_ NaT-check ;;
nop 0 add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
ldf8 f10=[r8] // f10 <- local_cpu_data->nsec_per_cyc value extr r8 = r21,0,16 // time_interpolator->source
cmp.ne p8, p0=0, r9 nop.i 123
(p8) br.spnt.many fsys_fallback_syscall cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
;; (p6) br.cond.spnt.many fsys_fallback_syscall
.retry: // *** seq = read_seqbegin(&xtime_lock); *** ;;
ld4.acq r23=[r17] // since &xtime_lock == &xtime_lock->sequence cmp.eq p8,p12 = 0,r8 // Check for cpu timer
ld8 r14=[r25] // r14 (old) = last_nsec_offset cmp.eq p9,p0 = 1,r8 // MMIO64 ?
extr r2 = r21,24,8 // time_interpolator->jitter
ld8 r28=[r26] // r28 = jiffies cmp.eq p10,p0 = 2,r8 // MMIO32 ?
ld8 r29=[r27] // r29 = wall_jiffies cmp.lt p11,p0 = 2,r8 // function?
;; (p11) br.cond.spnt.many fsys_fallback_syscall
;;
ldf8 f8=[r21] // f8 now contains itm_next setf.sig f7 = r3 // Setup for scaling of counter
mov.m r31=ar.itc // put time stamp into r31 (ITC) == now (p15) movl r19 = wall_to_monotonic
sub r28=r29, r28, 1 // r28 now contains "-(lost + 1)" (p12) ld8 r30 = [r10]
;; cmp.ne p13,p0 = r2,r0 // need jitter compensation?
extr r21 = r21,16,8 // shift factor
ld8 r2=[r19] // r2 = sec = xtime.tv_sec ;;
ld8 r29=[r20] // r29 = nsec = xtime.tv_nsec .time_redo:
tbit.nz p9, p10=r23, 0 // p9 <- is_odd(r23), p10 <- is_even(r23) .pred.rel.mutex p8,p9,p10
ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
setf.sig f6=r28 // f6 <- -(lost + 1) (6 cyc) (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
;; add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
(p10) ld4 r2 = [r30] // readw(ti->address)
(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
;; // could be removed by moving the last add upward
ld8 r26 = [r22] // time_interpolator->last_counter
(p13) ld8 r25 = [r23] // time interpolator->last_cycle
add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
nop.i 123
;;
ld8 r18 = [r24] // time_interpolator->offset
ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
;;
(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
sub r10 = r2,r26 // current_counter - last_counter
;;
(p6) sub r10 = r25,r26 // time we got was less than last_cycle
(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
;;
setf.sig f8 = r10
nop.i 123
;;
(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
;;
(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
// simulate tbit.nz.or p7,p0 = r28,0
and r28 = ~1,r28 // Make sequence even to force retry if odd
getf.sig r2 = f8
mf mf
xma.l f8=f6, f7, f8 // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next (5 cyc) add r8 = r8,r18 // Add time interpolator offset
nop 0 ;;
ld4 r10 = [r29] // xtime_lock.sequence
setf.sig f12=r31 // f12 <- ITC (6 cyc) (p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
// *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; *** shr.u r2 = r2,r21
ld4 r24=[r17] // r24 = xtime_lock->sequence (re-read) ;; // overloaded 3 bundles!
nop 0 // End critical section.
;; add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne.or p7,p0 = r28,r10
xma.l f8=f11, f8, f12 // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick) (p7) br.cond.dpnt.few .time_redo // sequence number changed ?
nop 0 // Now r8=tv->tv_nsec and r9=tv->tv_sec
;; mov r10 = r0
movl r2 = 1000000000
getf.sig r18=f8 // r18 <- (now - last_tick) add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
xmpy.l f8=f8, f10 // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc) (p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
add r3=r29, r14 // r3 = (nsec + old) ;;
;; .time_normalize:
mov r21 = r8
cmp.lt p7, p8=r18, r0 // if now < last_tick, set p7 = 1, p8 = 0 cmp.ge p6,p0 = r8,r2
getf.sig r18=f8 // r18 = elapsed_cycles*nsec_per_cyc (6 cyc) (p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
nop 0 ;;
;; (p14) setf.sig f8 = r20
(p6) sub r8 = r8,r2
(p10) cmp.ne p9, p0=r23, r24 // if xtime_lock->sequence != seq, set p9 (p6) add r9 = 1,r9 // two nops before the branch.
shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT // r18 <- offset (p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
(p9) br.spnt.many .retry (p6) br.cond.dpnt.few .time_normalize
;; ;;
// Divided by 8 though shift. Now divide by 125
mov ar.ccv=r14 // ar.ccv = old (1 cyc) // The compiler was able to do that with a multiply
cmp.leu p7, p8=r18, r14 // if (offset <= old), set p7 = 1, p8 = 0 // and a shift and we do the same
;; EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
(p8) cmpxchg8.rel r24=[r25], r18, ar.ccv // compare-and-exchange (atomic!) ;;
(p8) add r3=r29, r18 // r3 = (nsec + offset) mov r8 = r0
;; (p14) getf.sig r2 = f8
shr.u r3=r3, 3 // initiate dividing r3 by 1000 ;;
;; (p14) shr.u r21 = r2, 4
setf.sig f8=r3 // (6 cyc) ;;
mov r10=1000000 // r10 = 1000000 EX(.fail_efault, st8 [r31] = r9)
;; EX(.fail_efault, st8 [r23] = r21)
(p8) cmp.ne.unc p9, p0=r24, r14
xmpy.hu f6=f8, f9 // (5 cyc)
(p9) br.spnt.many .retry
;;
getf.sig r3=f6 // (6 cyc)
;;
shr.u r3=r3, 4 // end of division, r3 is divided by 1000 (=usec)
;;
1: cmp.geu p7, p0=r3, r10 // while (usec >= 1000000)
;;
(p7) sub r3=r3, r10 // usec -= 1000000
(p7) adds r2=1, r2 // ++sec
(p7) br.spnt.many 1b
// finally: r2 = sec, r3 = usec
EX(.fail_efault, st8 [r32]=r2)
adds r9=8, r32
mov r8=r0 // success
;;
EX(.fail_efault, st8 [r9]=r3) // store them in the timeval struct
mov r10=0
FSYS_RETURN FSYS_RETURN
/*
* Note: We are NOT clearing the scratch registers here. Since the only things
* in those registers are time-related variables and some addresses (which
* can be obtained from System.map), none of this should be security-sensitive
* and we should be fine.
*/
.fail_einval: .fail_einval:
mov r8=EINVAL // r8 = EINVAL mov r8 = EINVAL
mov r10=-1 // r10 = -1 mov r10 = -1
FSYS_RETURN FSYS_RETURN
.fail_efault: .fail_efault:
mov r8=EFAULT // r8 = EFAULT mov r8 = EFAULT
mov r10=-1 // r10 = -1 mov r10 = -1
FSYS_RETURN FSYS_RETURN
END(fsys_gettimeofday) END(fsys_gettimeofday)
ENTRY(fsys_clock_gettime)
.prologue
.altrp b6
.body
cmp4.lt p6, p0 = CLOCK_MONOTONIC, r32
// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
(p6) br.spnt.few fsys_fallback_syscall
mov r31 = r33
shl r30 = r32,15
br.many .gettime
END(fsys_clock_gettime)
/* /*
* long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
*/ */
...@@ -838,7 +851,7 @@ fsyscall_table: ...@@ -838,7 +851,7 @@ fsyscall_table:
data8 0 // timer_getoverrun data8 0 // timer_getoverrun
data8 0 // timer_delete data8 0 // timer_delete
data8 0 // clock_settime data8 0 // clock_settime
data8 0 // clock_gettime data8 fsys_clock_gettime // clock_gettime
data8 0 // clock_getres // 1255 data8 0 // clock_getres // 1255
data8 0 // clock_nanosleep data8 0 // clock_nanosleep
data8 0 // fstatfs64 data8 0 // fstatfs64
......
...@@ -45,146 +45,7 @@ EXPORT_SYMBOL(last_cli_ip); ...@@ -45,146 +45,7 @@ EXPORT_SYMBOL(last_cli_ip);
#endif #endif
static void static struct time_interpolator itc_interpolator;
itc_reset (void)
{
}
/*
* Adjust for the fact that xtime has been advanced by delta_nsec (may be negative and/or
* larger than NSEC_PER_SEC.
*/
static void
itc_update (long delta_nsec)
{
}
/*
* Return the number of nano-seconds that elapsed since the last
* update to jiffy. It is quite possible that the timer interrupt
* will interrupt this and result in a race for any of jiffies,
* wall_jiffies or itm_next. Thus, the xtime_lock must be at least
* read synchronised when calling this routine (see do_gettimeofday()
* below for an example).
*/
unsigned long
itc_get_offset (void)
{
unsigned long elapsed_cycles, lost = jiffies - wall_jiffies;
unsigned long now = ia64_get_itc(), last_tick;
last_tick = (cpu_data(TIME_KEEPER_ID)->itm_next
- (lost + 1)*cpu_data(TIME_KEEPER_ID)->itm_delta);
elapsed_cycles = now - last_tick;
return (elapsed_cycles*local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT;
}
static struct time_interpolator itc_interpolator = {
.get_offset = itc_get_offset,
.update = itc_update,
.reset = itc_reset
};
int
do_settimeofday (struct timespec *tv)
{
time_t wtm_sec, sec = tv->tv_sec;
long wtm_nsec, nsec = tv->tv_nsec;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
write_seqlock_irq(&xtime_lock);
{
/*
* This is revolting. We need to set "xtime" correctly. However, the value
* in this location is the value at the most recent update of wall time.
* Discover what correction gettimeofday would have done, and then undo
* it!
*/
nsec -= time_interpolator_get_offset();
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
time_interpolator_reset();
}
write_sequnlock_irq(&xtime_lock);
clock_was_set();
return 0;
}
EXPORT_SYMBOL(do_settimeofday);
void
do_gettimeofday (struct timeval *tv)
{
unsigned long seq, nsec, usec, sec, old, offset;
while (1) {
seq = read_seqbegin(&xtime_lock);
{
old = last_nsec_offset;
offset = time_interpolator_get_offset();
sec = xtime.tv_sec;
nsec = xtime.tv_nsec;
}
if (unlikely(read_seqretry(&xtime_lock, seq)))
continue;
/*
* Ensure that for any pair of causally ordered gettimeofday() calls, time
* never goes backwards (even when ITC on different CPUs are not perfectly
* synchronized). (A pair of concurrent calls to gettimeofday() is by
* definition non-causal and hence it makes no sense to talk about
* time-continuity for such calls.)
*
* Doing this in a lock-free and race-free manner is tricky. Here is why
* it works (most of the time): read_seqretry() just succeeded, which
* implies we calculated a consistent (valid) value for "offset". If the
* cmpxchg() below succeeds, we further know that last_nsec_offset still
* has the same value as at the beginning of the loop, so there was
* presumably no timer-tick or other updates to last_nsec_offset in the
* meantime. This isn't 100% true though: there _is_ a possibility of a
* timer-tick occurring right right after read_seqretry() and then getting
* zero or more other readers which will set last_nsec_offset to the same
* value as the one we read at the beginning of the loop. If this
* happens, we'll end up returning a slightly newer time than we ought to
* (the jump forward is at most "offset" nano-seconds). There is no
* danger of causing time to go backwards, though, so we are safe in that
* sense. We could make the probability of this unlucky case occurring
* arbitrarily small by encoding a version number in last_nsec_offset, but
* even without versioning, the probability of this unlucky case should be
* so small that we won't worry about it.
*/
if (offset <= old) {
offset = old;
break;
} else if (likely(cmpxchg(&last_nsec_offset, old, offset) == old))
break;
/* someone else beat us to updating last_nsec_offset; try again */
}
usec = (nsec + offset) / 1000;
while (unlikely(usec >= USEC_PER_SEC)) {
usec -= USEC_PER_SEC;
++sec;
}
tv->tv_sec = sec;
tv->tv_usec = usec;
}
EXPORT_SYMBOL(do_gettimeofday);
static irqreturn_t static irqreturn_t
timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
...@@ -277,6 +138,18 @@ ia64_cpu_local_tick (void) ...@@ -277,6 +138,18 @@ ia64_cpu_local_tick (void)
ia64_set_itm(local_cpu_data->itm_next); ia64_set_itm(local_cpu_data->itm_next);
} }
static int nojitter;
static int __init nojitter_setup(char *str)
{
nojitter = 1;
printk("Jitter checking for ITC timers disabled\n");
return 1;
}
__setup("nojitter", nojitter_setup);
void __devinit void __devinit
ia64_init_itm (void) ia64_init_itm (void)
{ {
...@@ -339,7 +212,23 @@ ia64_init_itm (void) ...@@ -339,7 +212,23 @@ ia64_init_itm (void)
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
itc_interpolator.frequency = local_cpu_data->itc_freq; itc_interpolator.frequency = local_cpu_data->itc_freq;
itc_interpolator.shift = 16;
itc_interpolator.drift = itc_drift; itc_interpolator.drift = itc_drift;
itc_interpolator.source = TIME_SOURCE_CPU;
#ifdef CONFIG_SMP
/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
* Jitter compensation requires a cmpxchg which may limit
* the scalability of the syscalls for retrieving time.
* The ITC synchronization is usually successful to within a few
* ITC ticks but this is not a sure thing. If you need to improve
* timer performance in SMP situations then boot the kernel with the
* "nojitter" option. However, doing so may result in time fluctuating (maybe
* even going backward) if the ITC offsets between the individual CPUs
* are too large.
*/
if (!nojitter) itc_interpolator.jitter = 1;
#endif
itc_interpolator.addr = NULL;
register_time_interpolator(&itc_interpolator); register_time_interpolator(&itc_interpolator);
} }
......
...@@ -20,57 +20,16 @@ ...@@ -20,57 +20,16 @@
extern unsigned long sn_rtc_cycles_per_second; extern unsigned long sn_rtc_cycles_per_second;
static volatile unsigned long last_wall_rtc;
static unsigned long rtc_offset; /* updated only when xtime write-lock is held! */ static struct time_interpolator sn2_interpolator;
static long rtc_nsecs_per_cycle;
static long rtc_per_timer_tick;
static unsigned long
getoffset(void)
{
return rtc_offset + (GET_RTC_COUNTER() - last_wall_rtc)*rtc_nsecs_per_cycle;
}
static void
update(long delta_nsec)
{
unsigned long rtc_counter = GET_RTC_COUNTER();
unsigned long offset = rtc_offset + (rtc_counter - last_wall_rtc)*rtc_nsecs_per_cycle;
/* Be careful about signed/unsigned comparisons here: */
if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
rtc_offset = offset - delta_nsec;
else
rtc_offset = 0;
last_wall_rtc = rtc_counter;
}
static void
reset(void)
{
rtc_offset = 0;
last_wall_rtc = GET_RTC_COUNTER();
}
static struct time_interpolator sn2_interpolator = {
.get_offset = getoffset,
.update = update,
.reset = reset
};
void __init void __init
sn_timer_init(void) sn_timer_init(void)
{ {
sn2_interpolator.frequency = sn_rtc_cycles_per_second; sn2_interpolator.frequency = sn_rtc_cycles_per_second;
sn2_interpolator.drift = -1; /* unknown */ sn2_interpolator.drift = -1; /* unknown */
sn2_interpolator.shift = 10; /* RTC is 54 bits maximum shift is 10 */
sn2_interpolator.addr = RTC_COUNTER_ADDR;
sn2_interpolator.source = TIME_SOURCE_MMIO64;
register_time_interpolator(&sn2_interpolator); register_time_interpolator(&sn2_interpolator);
rtc_per_timer_tick = sn_rtc_cycles_per_second / HZ;
rtc_nsecs_per_cycle = 1000000000 / sn_rtc_cycles_per_second;
last_wall_rtc = GET_RTC_COUNTER();
} }
...@@ -662,40 +662,10 @@ int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) ...@@ -662,40 +662,10 @@ int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
#ifdef CONFIG_TIME_INTERPOLATION #ifdef CONFIG_TIME_INTERPOLATION
static unsigned long hpet_offset, last_wall_hpet;
static long hpet_nsecs_per_cycle, hpet_cycles_per_sec;
static unsigned long hpet_getoffset(void)
{
return hpet_offset + (read_counter(&hpets->hp_hpet->hpet_mc) -
last_wall_hpet) * hpet_nsecs_per_cycle;
}
static void hpet_update(long delta)
{
unsigned long mc;
unsigned long offset;
mc = read_counter(&hpets->hp_hpet->hpet_mc);
offset = hpet_offset + (mc - last_wall_hpet) * hpet_nsecs_per_cycle;
if (delta < 0 || (unsigned long)delta < offset)
hpet_offset = offset - delta;
else
hpet_offset = 0;
last_wall_hpet = mc;
}
static void hpet_reset(void)
{
hpet_offset = 0;
last_wall_hpet = read_counter(&hpets->hp_hpet->hpet_mc);
}
static struct time_interpolator hpet_interpolator = { static struct time_interpolator hpet_interpolator = {
.get_offset = hpet_getoffset, .source = TIME_SOURCE_MMIO64,
.update = hpet_update, .shift = 10,
.reset = hpet_reset .addr = MC
}; };
#endif #endif
......
...@@ -47,14 +47,18 @@ ...@@ -47,14 +47,18 @@
* kernel PLL updated to 1994-12-13 specs (rfc-1589) * kernel PLL updated to 1994-12-13 specs (rfc-1589)
* 1997-08-30 Ulrich Windl * 1997-08-30 Ulrich Windl
* Added new constant NTP_PHASE_LIMIT * Added new constant NTP_PHASE_LIMIT
* 2004-08-12 Christoph Lameter
* Reworked time interpolation logic
*/ */
#ifndef _LINUX_TIMEX_H #ifndef _LINUX_TIMEX_H
#define _LINUX_TIMEX_H #define _LINUX_TIMEX_H
#include <linux/config.h> #include <linux/config.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/jiffies.h>
#include <asm/param.h> #include <asm/param.h>
#include <asm/io.h>
/* /*
* The following defines establish the engineering parameters of the PLL * The following defines establish the engineering parameters of the PLL
...@@ -320,101 +324,60 @@ extern long pps_stbcnt; /* stability limit exceeded */ ...@@ -320,101 +324,60 @@ extern long pps_stbcnt; /* stability limit exceeded */
#ifdef CONFIG_TIME_INTERPOLATION #ifdef CONFIG_TIME_INTERPOLATION
struct time_interpolator { #define TIME_SOURCE_CPU 0
/* cache-hot stuff first: */ #define TIME_SOURCE_MMIO64 1
unsigned long (*get_offset) (void); #define TIME_SOURCE_MMIO32 2
void (*update) (long); #define TIME_SOURCE_FUNCTION 3
void (*reset) (void);
/* For proper operations time_interpolator clocks must run slightly slower
* than the standard clock since the interpolator may only correct by having
* time jump forward during a tick. A slower clock is usually a side effect
* of the integer divide of the nanoseconds in a second by the frequency.
* The accuracy of the division can be increased by specifying a shift.
* However, this may cause the clock not to be slow enough.
* The interpolator will self-tune the clock by slowing down if no
* resets occur or speeding up if the time jumps per analysis cycle
* become too high.
*
* Setting jitter compensates for a fluctuating timesource by comparing
* to the last value read from the timesource to insure that an earlier value
* is not returned by a later call. The price to pay
* for the compensation is that the timer routines are not as scalable anymore.
*/
/* cache-cold stuff follows here: */ #define INTERPOLATOR_ADJUST 65536
struct time_interpolator *next; #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST
struct time_interpolator {
unsigned short source; /* time source flags */
unsigned char shift; /* increases accuracy of multiply by shifting. */
/* Note that bits may be lost if shift is set too high */
unsigned char jitter; /* if set compensate for fluctuations */
unsigned nsec_per_cyc; /* set by register_time_interpolator() */
void *addr; /* address of counter or function */
unsigned long offset; /* nsec offset at last update of interpolator */
unsigned long last_counter; /* counter value in units of the counter at last update */
unsigned long last_cycle; /* Last timer value if TIME_SOURCE_JITTER is set */
unsigned long frequency; /* frequency in counts/second */ unsigned long frequency; /* frequency in counts/second */
long drift; /* drift in parts-per-million (or -1) */ long drift; /* drift in parts-per-million (or -1) */
unsigned long skips; /* skips forward */
unsigned long ns_skipped; /* nanoseconds skipped */
struct time_interpolator *next;
}; };
extern volatile unsigned long last_nsec_offset;
#ifndef __HAVE_ARCH_CMPXCHG
extern spin_lock_t last_nsec_offset_lock;
#endif
extern struct time_interpolator *time_interpolator;
extern void register_time_interpolator(struct time_interpolator *); extern void register_time_interpolator(struct time_interpolator *);
extern void unregister_time_interpolator(struct time_interpolator *); extern void unregister_time_interpolator(struct time_interpolator *);
extern void time_interpolator_reset(void);
/* Called with xtime WRITE-lock acquired. */ extern unsigned long time_interpolator_resolution(void);
static inline void extern unsigned long time_interpolator_get_offset(void);
time_interpolator_update(long delta_nsec)
{
struct time_interpolator *ti = time_interpolator;
if (last_nsec_offset > 0) {
#ifdef __HAVE_ARCH_CMPXCHG
unsigned long new, old;
do {
old = last_nsec_offset;
if (old > delta_nsec)
new = old - delta_nsec;
else
new = 0;
} while (cmpxchg(&last_nsec_offset, old, new) != old);
#else
/*
* This really hurts, because it serializes gettimeofday(), but without an
* atomic single-word compare-and-exchange, there isn't all that much else
* we can do.
*/
spin_lock(&last_nsec_offset_lock);
{
last_nsec_offset -= min(last_nsec_offset, delta_nsec);
}
spin_unlock(&last_nsec_offset_lock);
#endif
}
if (ti)
(*ti->update)(delta_nsec);
}
/* Called with xtime WRITE-lock acquired. */
static inline void
time_interpolator_reset(void)
{
struct time_interpolator *ti = time_interpolator;
last_nsec_offset = 0;
if (ti)
(*ti->reset)();
}
/* Called with xtime READ-lock acquired. */
static inline unsigned long
time_interpolator_get_offset(void)
{
struct time_interpolator *ti = time_interpolator;
if (ti)
return (*ti->get_offset)();
return last_nsec_offset;
}
#else /* !CONFIG_TIME_INTERPOLATION */ #else /* !CONFIG_TIME_INTERPOLATION */
static inline void
time_interpolator_update(long delta_nsec)
{
}
static inline void static inline void
time_interpolator_reset(void) time_interpolator_reset(void)
{ {
} }
static inline unsigned long
time_interpolator_get_offset(void)
{
return 0;
}
#endif /* !CONFIG_TIME_INTERPOLATION */ #endif /* !CONFIG_TIME_INTERPOLATION */
#endif /* KERNEL */ #endif /* KERNEL */
......
...@@ -219,6 +219,11 @@ static __init int init_posix_timers(void) ...@@ -219,6 +219,11 @@ static __init int init_posix_timers(void)
.clock_set = do_posix_clock_monotonic_settime .clock_set = do_posix_clock_monotonic_settime
}; };
#ifdef CONFIG_TIME_INTERPOLATION
/* Clocks are more accurate with time interpolators */
clock_realtime.res = clock_monotonic.res = time_interpolator_resolution();
#endif
register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_REALTIME, &clock_realtime);
register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
......
...@@ -126,7 +126,7 @@ inline static void warp_clock(void) ...@@ -126,7 +126,7 @@ inline static void warp_clock(void)
write_seqlock_irq(&xtime_lock); write_seqlock_irq(&xtime_lock);
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
xtime.tv_sec += sys_tz.tz_minuteswest * 60; xtime.tv_sec += sys_tz.tz_minuteswest * 60;
time_interpolator_update(sys_tz.tz_minuteswest * 60 * NSEC_PER_SEC); time_interpolator_reset();
write_sequnlock_irq(&xtime_lock); write_sequnlock_irq(&xtime_lock);
clock_was_set(); clock_was_set();
} }
...@@ -442,6 +442,68 @@ void getnstimeofday (struct timespec *tv) ...@@ -442,6 +442,68 @@ void getnstimeofday (struct timespec *tv)
tv->tv_sec = sec; tv->tv_sec = sec;
tv->tv_nsec = nsec; tv->tv_nsec = nsec;
} }
int do_settimeofday (struct timespec *tv)
{
time_t wtm_sec, sec = tv->tv_sec;
long wtm_nsec, nsec = tv->tv_nsec;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
write_seqlock_irq(&xtime_lock);
{
/*
* This is revolting. We need to set "xtime" correctly. However, the value
* in this location is the value at the most recent update of wall time.
* Discover what correction gettimeofday would have done, and then undo
* it!
*/
nsec -= time_interpolator_get_offset();
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
time_interpolator_reset();
}
write_sequnlock_irq(&xtime_lock);
clock_was_set();
return 0;
}
EXPORT_SYMBOL(do_settimeofday);
void do_gettimeofday (struct timeval *tv)
{
unsigned long seq, nsec, usec, sec, offset;
do {
seq = read_seqbegin(&xtime_lock);
offset = time_interpolator_get_offset();
sec = xtime.tv_sec;
nsec = xtime.tv_nsec;
} while (unlikely(read_seqretry(&xtime_lock, seq)));
usec = (nsec + offset) / 1000;
while (unlikely(usec >= USEC_PER_SEC)) {
usec -= USEC_PER_SEC;
++sec;
}
tv->tv_sec = sec;
tv->tv_usec = usec;
}
EXPORT_SYMBOL(do_gettimeofday);
#else #else
/* /*
* Simulate gettimeofday using do_gettimeofday which only allows a timeval * Simulate gettimeofday using do_gettimeofday which only allows a timeval
......
...@@ -37,6 +37,12 @@ ...@@ -37,6 +37,12 @@
#include <asm/div64.h> #include <asm/div64.h>
#include <asm/timex.h> #include <asm/timex.h>
#ifdef CONFIG_TIME_INTERPOLATION
static void time_interpolator_update(long delta_nsec);
#else
#define time_interpolator_update(x)
#endif
/* /*
* per-CPU timer vector definitions: * per-CPU timer vector definitions:
*/ */
...@@ -621,6 +627,9 @@ static void second_overflow(void) ...@@ -621,6 +627,9 @@ static void second_overflow(void)
if (xtime.tv_sec % 86400 == 0) { if (xtime.tv_sec % 86400 == 0) {
xtime.tv_sec--; xtime.tv_sec--;
wall_to_monotonic.tv_sec++; wall_to_monotonic.tv_sec++;
/* The timer interpolator will make time change gradually instead
* of an immediate jump by one second.
*/
time_interpolator_update(-NSEC_PER_SEC); time_interpolator_update(-NSEC_PER_SEC);
time_state = TIME_OOP; time_state = TIME_OOP;
clock_was_set(); clock_was_set();
...@@ -632,6 +641,7 @@ static void second_overflow(void) ...@@ -632,6 +641,7 @@ static void second_overflow(void)
if ((xtime.tv_sec + 1) % 86400 == 0) { if ((xtime.tv_sec + 1) % 86400 == 0) {
xtime.tv_sec++; xtime.tv_sec++;
wall_to_monotonic.tv_sec--; wall_to_monotonic.tv_sec--;
/* Use of time interpolator for a gradual change of time */
time_interpolator_update(NSEC_PER_SEC); time_interpolator_update(NSEC_PER_SEC);
time_state = TIME_WAIT; time_state = TIME_WAIT;
clock_was_set(); clock_was_set();
...@@ -1427,15 +1437,109 @@ void __init init_timers(void) ...@@ -1427,15 +1437,109 @@ void __init init_timers(void)
} }
#ifdef CONFIG_TIME_INTERPOLATION #ifdef CONFIG_TIME_INTERPOLATION
volatile unsigned long last_nsec_offset;
#ifndef __HAVE_ARCH_CMPXCHG
spinlock_t last_nsec_offset_lock = SPIN_LOCK_UNLOCKED;
#endif
struct time_interpolator *time_interpolator; struct time_interpolator *time_interpolator;
static struct time_interpolator *time_interpolator_list; static struct time_interpolator *time_interpolator_list;
static spinlock_t time_interpolator_lock = SPIN_LOCK_UNLOCKED; static spinlock_t time_interpolator_lock = SPIN_LOCK_UNLOCKED;
static inline unsigned long time_interpolator_get_cycles(unsigned int src)
{
unsigned long (*x)(void);
switch (src)
{
case TIME_SOURCE_FUNCTION:
x = time_interpolator->addr;
return x();
case TIME_SOURCE_MMIO64 :
return readq(time_interpolator->addr);
case TIME_SOURCE_MMIO32 :
return readl(time_interpolator->addr);
default: return get_cycles();
}
}
static inline unsigned long time_interpolator_get_counter(void)
{
unsigned int src = time_interpolator->source;
if (time_interpolator->jitter)
{
unsigned long lcycle;
unsigned long now;
do {
lcycle = time_interpolator->last_cycle;
now = time_interpolator_get_cycles(src);
if (lcycle && time_after(lcycle, now)) return lcycle;
/* Keep track of the last timer value returned. The use of cmpxchg here
* will cause contention in an SMP environment.
*/
} while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle));
return now;
}
else
return time_interpolator_get_cycles(src);
}
void time_interpolator_reset(void)
{
time_interpolator->offset = 0;
time_interpolator->last_counter = time_interpolator_get_counter();
}
unsigned long time_interpolator_resolution(void)
{
return NSEC_PER_SEC / time_interpolator->frequency;
}
#define GET_TI_NSECS(count,i) ((((count) - i->last_counter) * i->nsec_per_cyc) >> i->shift)
unsigned long time_interpolator_get_offset(void)
{
return time_interpolator->offset +
GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator);
}
static void time_interpolator_update(long delta_nsec)
{
unsigned long counter = time_interpolator_get_counter();
unsigned long offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator);
/* The interpolator compensates for late ticks by accumulating
* the late time in time_interpolator->offset. A tick earlier than
* expected will lead to a reset of the offset and a corresponding
* jump of the clock forward. Again this only works if the
* interpolator clock is running slightly slower than the regular clock
* and the tuning logic insures that.
*/
if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
time_interpolator->offset = offset - delta_nsec;
else {
time_interpolator->skips++;
time_interpolator->ns_skipped += delta_nsec - offset;
time_interpolator->offset = 0;
}
time_interpolator->last_counter = counter;
/* Tuning logic for time interpolator invoked every minute or so.
* Decrease interpolator clock speed if no skips occurred and an offset is carried.
* Increase interpolator clock speed if we skip too much time.
*/
if (jiffies % INTERPOLATOR_ADJUST == 0)
{
if (time_interpolator->skips == 0 && time_interpolator->offset > TICK_NSEC)
time_interpolator->nsec_per_cyc--;
if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0)
time_interpolator->nsec_per_cyc++;
time_interpolator->skips = 0;
time_interpolator->ns_skipped = 0;
}
}
static inline int static inline int
is_better_time_interpolator(struct time_interpolator *new) is_better_time_interpolator(struct time_interpolator *new)
{ {
...@@ -1450,10 +1554,13 @@ register_time_interpolator(struct time_interpolator *ti) ...@@ -1450,10 +1554,13 @@ register_time_interpolator(struct time_interpolator *ti)
{ {
unsigned long flags; unsigned long flags;
ti->nsec_per_cyc = (NSEC_PER_SEC << ti->shift) / ti->frequency;
spin_lock(&time_interpolator_lock); spin_lock(&time_interpolator_lock);
write_seqlock_irqsave(&xtime_lock, flags); write_seqlock_irqsave(&xtime_lock, flags);
if (is_better_time_interpolator(ti)) if (is_better_time_interpolator(ti)) {
time_interpolator = ti; time_interpolator = ti;
time_interpolator_reset();
}
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
ti->next = time_interpolator_list; ti->next = time_interpolator_list;
...@@ -1485,6 +1592,7 @@ unregister_time_interpolator(struct time_interpolator *ti) ...@@ -1485,6 +1592,7 @@ unregister_time_interpolator(struct time_interpolator *ti)
for (curr = time_interpolator_list; curr; curr = curr->next) for (curr = time_interpolator_list; curr; curr = curr->next)
if (is_better_time_interpolator(curr)) if (is_better_time_interpolator(curr))
time_interpolator = curr; time_interpolator = curr;
time_interpolator_reset();
} }
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
spin_unlock(&time_interpolator_lock); spin_unlock(&time_interpolator_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment