Commit 7fe672b1 authored by George Anzinger's avatar George Anzinger Committed by Linus Torvalds

[PATCH] Bugfix for CLOCK_REALTIME absolute timer

As required by the standard, this patch adds to POSIX ABSOLUTE timers the
functionality of adjusting the timer when the clock is set so that it still
expires at the specified time (provided that time has not passed, in which
case the timer expires immeadiatly).

The standard is, IMNSOHO, a bit vague on just how repeating timers are to
be handled so I made some choices:

1) If an absolute timer is to expire every N intervals, we assume that
   the expiries should happen at those specified times after clock setting.
    I.e.  we adjust the repeat timer as well as the initial timer.  (The
   other option would be to treat the repeating timers as relative and not
   to adjust them.)

2) If a clock set moves the the clock prior to the initial expiry time
   AND that time has already passed and been signaled, the current repeat
   timer is adjusted, i.e.  we DO NOT go back to the initial time and
   repeat that.  (The other option is to treat this case as a new request
   with the initial timer parameters (which by this time we have lost).)

3) If time is advanced such that it appears that several expiries have
   been missed, the overrun count will reflect the misses.  (The other
   option is to not reflect this in the overrun.) At the same time, nothing
   is done to acknowledge, to the user, that we are repeating expiries when
   the clock is retarded.
Signed-off-by: default avatarGeorge Anzinger <george@mvista.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 84db6603
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H
#include <linux/spinlock.h>
#include <linux/list.h>
struct k_clock_abs {
struct list_head list;
spinlock_t lock;
};
struct k_clock {
int res; /* in nano seconds */
struct k_clock_abs *abs_struct;
int (*clock_set) (struct timespec * tp);
int (*clock_get) (struct timespec * tp);
int (*nsleep) (int flags,
......@@ -23,8 +31,14 @@ struct now_struct {
#define posix_time_before(timer, now) \
time_before((timer)->expires, (now)->jiffies)
#define posix_bump_timer(timr) do { \
(timr)->it_timer.expires += (timr)->it_incr; \
(timr)->it_overrun++; \
}while (0)
#define posix_bump_timer(timr, now) \
do { \
long delta, orun; \
delta = now.jiffies - (timr)->it_timer.expires; \
if (delta >= 0) { \
orun = 1 + (delta / (timr)->it_incr); \
(timr)->it_timer.expires += orun * (timr)->it_incr; \
(timr)->it_overrun += orun; \
} \
}while (0)
#endif
......@@ -343,6 +343,8 @@ struct k_itimer {
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
struct list_head abs_timer_entry; /* clock abs_timer_list */
struct timespec wall_to_prev; /* wall_to_monotonic used when set */
};
......
......@@ -7,6 +7,9 @@
*
* Copyright (C) 2002 2003 by MontaVista Software.
*
* 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
* Copyright (C) 2004 Boris Hu
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
......@@ -41,6 +44,7 @@
#include <linux/idr.h>
#include <linux/posix-timers.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#ifndef div_long_long_rem
#include <asm/div64.h>
......@@ -169,6 +173,12 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
*/
static struct k_clock posix_clocks[MAX_CLOCKS];
/*
* We only have one real clock that can be set so we need only one abs list,
* even if we should want to have several clocks with differing resolutions.
*/
static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
.lock = SPIN_LOCK_UNLOCKED};
#define if_clock_do(clock_fun,alt_fun,parms) \
(!clock_fun) ? alt_fun parms : clock_fun parms
......@@ -200,8 +210,11 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
*/
static __init int init_posix_timers(void)
{
struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
.abs_struct = &abs_list
};
struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
.abs_struct = NULL,
.clock_get = do_posix_clock_monotonic_gettime,
.clock_set = do_posix_clock_monotonic_settime
};
......@@ -212,7 +225,6 @@ static __init int init_posix_timers(void)
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, 0, NULL, NULL);
idr_init(&posix_timers_id);
return 0;
}
......@@ -239,19 +251,92 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
}
/*
* This function adjusts the timer as needed as a result of the clock
* being set. It should only be called for absolute timers, and then
* under the abs_list lock. It computes the time difference and sets
* the new jiffies value in the timer. It also updates the timers
* reference wall_to_monotonic value. It is complicated by the fact
* that tstojiffies() only handles positive times and it needs to work
* with both positive and negative times. Also, for negative offsets,
* we need to defeat the res round up.
*
* Return is true if there is a new time, else false.
*/
static long add_clockset_delta(struct k_itimer *timr,
struct timespec *new_wall_to)
{
struct timespec delta;
int sign = 0;
u64 exp;
set_normalized_timespec(&delta,
new_wall_to->tv_sec -
timr->wall_to_prev.tv_sec,
new_wall_to->tv_nsec -
timr->wall_to_prev.tv_nsec);
if (likely(!(delta.tv_sec | delta.tv_nsec)))
return 0;
if (delta.tv_sec < 0) {
set_normalized_timespec(&delta,
-delta.tv_sec,
1 - delta.tv_nsec -
posix_clocks[timr->it_clock].res);
sign++;
}
tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
timr->wall_to_prev = *new_wall_to;
timr->it_timer.expires += (sign ? -exp : exp);
return 1;
}
static void remove_from_abslist(struct k_itimer *timr)
{
if (!list_empty(&timr->abs_timer_entry)) {
spin_lock(&abs_list.lock);
list_del_init(&timr->abs_timer_entry);
spin_unlock(&abs_list.lock);
}
}
static void schedule_next_timer(struct k_itimer *timr)
{
struct timespec new_wall_to;
struct now_struct now;
unsigned long seq;
/* Set up the timer for the next interval (if there is one) */
/*
* Set up the timer for the next interval (if there is one).
* Note: this code uses the abs_timer_lock to protect
* wall_to_prev and must hold it until exp is set, not exactly
* obvious...
* This function is used for CLOCK_REALTIME* and
* CLOCK_MONOTONIC* timers. If we ever want to handle other
* CLOCKs, the calling code (do_schedule_next_timer) would need
* to pull the "clock" info from the timer and dispatch the
* "other" CLOCKs "next timer" code (which, I suppose should
* also be added to the k_clock structure).
*/
if (!timr->it_incr)
return;
posix_get_now(&now);
do {
posix_bump_timer(timr);
}while (posix_time_before(&timr->it_timer, &now));
seq = read_seqbegin(&xtime_lock);
new_wall_to = wall_to_monotonic;
posix_get_now(&now);
} while (read_seqretry(&xtime_lock, seq));
if (!list_empty(&timr->abs_timer_entry)) {
spin_lock(&abs_list.lock);
add_clockset_delta(timr, &new_wall_to);
posix_bump_timer(timr, now);
spin_unlock(&abs_list.lock);
} else {
posix_bump_timer(timr, now);
}
timr->it_overrun_last = timr->it_overrun;
timr->it_overrun = -1;
++timr->it_requeue_pending;
......@@ -312,7 +397,15 @@ static void timer_notify_task(struct k_itimer *timr)
memset(&timr->sigq->info, 0, sizeof(siginfo_t));
/* Send signal to the process that owns this timer. */
/*
* Send signal to the process that owns this timer.
* This code assumes that all the possible abs_lists share the
* same lock (there is only one list at this time). If this is
* not the case, the CLOCK info would need to be used to find
* the proper abs list lock.
*/
timr->sigq->info.si_signo = timr->it_sigev_signo;
timr->sigq->info.si_errno = 0;
timr->sigq->info.si_code = SI_TIMER;
......@@ -320,6 +413,9 @@ static void timer_notify_task(struct k_itimer *timr)
timr->sigq->info.si_value = timr->it_sigev_value;
if (timr->it_incr)
timr->sigq->info.si_sys_private = ++timr->it_requeue_pending;
else {
remove_from_abslist(timr);
}
if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
if (unlikely(timr->it_process->flags & PF_EXITING)) {
......@@ -350,16 +446,51 @@ static void timer_notify_task(struct k_itimer *timr)
* This function gets called when a POSIX.1b interval timer expires. It
* is used as a callback from the kernel internal timer. The
* run_timer_list code ALWAYS calls with interrutps on.
* This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
*/
static void posix_timer_fn(unsigned long __data)
{
struct k_itimer *timr = (struct k_itimer *) __data;
unsigned long flags;
unsigned long seq;
struct timespec delta, new_wall_to;
u64 exp = 0;
int do_notify = 1;
spin_lock_irqsave(&timr->it_lock, flags);
set_timer_inactive(timr);
timer_notify_task(timr);
unlock_timer(timr, flags);
if (!list_empty(&timr->abs_timer_entry)) {
spin_lock(&abs_list.lock);
do {
seq = read_seqbegin(&xtime_lock);
new_wall_to = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
set_normalized_timespec(&delta,
new_wall_to.tv_sec -
timr->wall_to_prev.tv_sec,
new_wall_to.tv_nsec -
timr->wall_to_prev.tv_nsec);
if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) {
/* do nothing, timer is on time */
} else if (delta.tv_sec < 0) {
/* do nothing, timer is already late */
} else {
/* timer is early due to a clock set */
tstojiffie(&delta,
posix_clocks[timr->it_clock].res,
&exp);
timr->wall_to_prev = new_wall_to;
timr->it_timer.expires += exp;
add_timer(&timr->it_timer);
do_notify = 0;
}
spin_unlock(&abs_list.lock);
}
if (do_notify)
timer_notify_task(timr);
unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
}
......@@ -397,6 +528,7 @@ static struct k_itimer * alloc_posix_timer(void)
if (!tmr)
return tmr;
memset(tmr, 0, sizeof (struct k_itimer));
INIT_LIST_HEAD(&tmr->abs_timer_entry);
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr);
tmr = NULL;
......@@ -644,8 +776,7 @@ do_timer_gettime(struct k_itimer *timr, struct itimerspec *cur_setting)
if (expires) {
if (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
while (posix_time_before(&timr->it_timer, &now))
posix_bump_timer(timr);
posix_bump_timer(timr, now);
expires = timr->it_timer.expires;
}
else
......@@ -721,11 +852,10 @@ sys_timer_getoverrun(timer_t timer_id)
* time to it to get the proper time for the timer.
*/
static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
int abs, u64 *exp)
int abs, u64 *exp, struct timespec *wall_to)
{
struct timespec now;
struct timespec oc = *tp;
struct timespec wall_to_mono;
u64 jiffies_64_f;
int rtn =0;
......@@ -733,15 +863,15 @@ static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
/*
* The mask pick up the 4 basic clocks
*/
if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) {
if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
&now, &wall_to_mono);
&now, wall_to);
/*
* If we are doing a MONOTONIC clock
*/
if((clock - &posix_clocks[0]) & CLOCKS_MONO){
now.tv_sec += wall_to_mono.tv_sec;
now.tv_nsec += wall_to_mono.tv_nsec;
now.tv_sec += wall_to->tv_sec;
now.tv_nsec += wall_to->tv_nsec;
}
} else {
/*
......@@ -831,6 +961,8 @@ do_timer_settime(struct k_itimer *timr, int flags,
#else
del_timer(&timr->it_timer);
#endif
remove_from_abslist(timr);
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
......@@ -845,24 +977,25 @@ do_timer_settime(struct k_itimer *timr, int flags,
if (adjust_abs_time(clock,
&new_setting->it_value, flags & TIMER_ABSTIME,
&expire_64)) {
&expire_64, &(timr->wall_to_prev))) {
return -EINVAL;
}
timr->it_timer.expires = (unsigned long)expire_64;
tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
timr->it_incr = (unsigned long)expire_64;
/*
* For some reason the timer does not fire immediately if expires is
* equal to jiffies, so the timer notify function is called directly.
* We do not even queue SIGEV_NONE timers!
* We do not even queue SIGEV_NONE timers! But we do put them
* in the abs list so we can do that right.
*/
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) {
if (timr->it_timer.expires == jiffies)
timer_notify_task(timr);
else
add_timer(&timr->it_timer);
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
add_timer(&timr->it_timer);
if (flags & TIMER_ABSTIME && clock->abs_struct) {
spin_lock(&clock->abs_struct->lock);
list_add_tail(&(timr->abs_timer_entry),
&(clock->abs_struct->list));
spin_unlock(&clock->abs_struct->lock);
}
return 0;
}
......@@ -896,7 +1029,7 @@ sys_timer_settime(timer_t timer_id, int flags,
if (!posix_clocks[timr->it_clock].timer_set)
error = do_timer_settime(timr, flags, &new_spec, rtn);
else
error = posix_clocks[timr->it_clock].timer_set(timr,
error = posix_clocks[timr->it_clock].timer_set(timr,
flags,
&new_spec, rtn);
unlock_timer(timr, flag);
......@@ -929,6 +1062,8 @@ static inline int do_timer_delete(struct k_itimer *timer)
#else
del_timer(&timer->it_timer);
#endif
remove_from_abslist(timer);
return 0;
}
......@@ -1171,13 +1306,93 @@ static void nanosleep_wake_up(unsigned long __data)
* On locking, clock_was_set() is called from update_wall_clock which
* holds (or has held for it) a write_lock_irq( xtime_lock) and is
* called from the timer bh code. Thus we need the irq save locks.
*
* Also, on the call from update_wall_clock, that is done as part of a
* softirq thing. We don't want to delay the system that much (possibly
* long list of timers to fix), so we defer that work to keventd.
*/
static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL);
static DECLARE_MUTEX(clock_was_set_lock);
void clock_was_set(void)
{
struct k_itimer *timr;
struct timespec new_wall_to;
LIST_HEAD(cws_list);
unsigned long seq;
if (unlikely(in_interrupt())) {
schedule_work(&clock_was_set_work);
return;
}
wake_up_all(&nanosleep_abs_wqueue);
/*
* Check if there exist TIMER_ABSTIME timers to correct.
*
* Notes on locking: This code is run in task context with irq
* on. We CAN be interrupted! All other usage of the abs list
* lock is under the timer lock which holds the irq lock as
* well. We REALLY don't want to scan the whole list with the
* interrupt system off, AND we would like a sequence lock on
* this code as well. Since we assume that the clock will not
* be set often, it seems ok to take and release the irq lock
* for each timer. In fact add_timer will do this, so this is
* not an issue. So we know when we are done, we will move the
* whole list to a new location. Then as we process each entry,
* we will move it to the actual list again. This way, when our
* copy is empty, we are done. We are not all that concerned
* about preemption so we will use a semaphore lock to protect
* aginst reentry. This way we will not stall another
* processor. It is possible that this may delay some timers
* that should have expired, given the new clock, but even this
* will be minimal as we will always update to the current time,
* even if it was set by a task that is waiting for entry to
* this code. Timers that expire too early will be caught by
* the expire code and restarted.
* Absolute timers that repeat are left in the abs list while
* waiting for the task to pick up the signal. This means we
* may find timers that are not in the "add_timer" list, but are
* in the abs list. We do the same thing for these, save
* putting them back in the "add_timer" list. (Note, these are
* left in the abs list mainly to indicate that they are
* ABSOLUTE timers, a fact that is used by the re-arm code, and
* for which we have no other flag.)
*/
down(&clock_was_set_lock);
spin_lock_irq(&abs_list.lock);
list_splice_init(&abs_list.list, &cws_list);
spin_unlock_irq(&abs_list.lock);
do {
do {
seq = read_seqbegin(&xtime_lock);
new_wall_to = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
spin_lock_irq(&abs_list.lock);
if (list_empty(&cws_list)) {
spin_unlock_irq(&abs_list.lock);
break;
}
timr = list_entry(cws_list.next, struct k_itimer,
abs_timer_entry);
list_del_init(&timr->abs_timer_entry);
if (add_clockset_delta(timr, &new_wall_to) &&
del_timer(&timr->it_timer)) /* timer run yet? */
add_timer(&timr->it_timer);
list_add(&timr->abs_timer_entry, &abs_list.list);
spin_unlock_irq(&abs_list.lock);
} while (1);
up(&clock_was_set_lock);
}
long clock_nanosleep_restart(struct restart_block *restart_block);
......@@ -1220,7 +1435,7 @@ sys_clock_nanosleep(clockid_t which_clock, int flags,
long
do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
{
struct timespec t;
struct timespec t, dum;
struct timer_list new_timer;
DECLARE_WAITQUEUE(abs_wqueue, current);
u64 rq_time = (u64)0;
......@@ -1260,7 +1475,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
t = *tsave;
if (abs || !rq_time) {
adjust_abs_time(&posix_clocks[which_clock], &t, abs,
&rq_time);
&rq_time, &dum);
rq_time += (t.tv_sec || t.tv_nsec);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment