timer.c 33.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  linux/kernel/timer.c
 *
 *  Kernel internal timers, kernel timekeeping, basic process system calls
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
 *
 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
 *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
 *              serialize accesses to xtime/lost_ticks).
 *                              Copyright (C) 1998  Andrea Arcangeli
 *  1999-03-10  Improved NTP compatibility by Ulrich Windl
Robert Love's avatar
Robert Love committed
16
 *  2002-05-31	Move sys_sysinfo here and make its locking sane, Robert Love
17 18 19
 *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
 *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
 *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22
 */

#include <linux/kernel_stat.h>
23 24 25 26
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
27
#include <linux/swap.h>
28
#include <linux/notifier.h>
29
#include <linux/thread_info.h>
30
#include <linux/time.h>
31
#include <linux/jiffies.h>
32
#include <linux/cpu.h>
Linus Torvalds's avatar
Linus Torvalds committed
33 34

#include <asm/uaccess.h>
35
#include <asm/div64.h>
36
#include <asm/timex.h>
Linus Torvalds's avatar
Linus Torvalds committed
37 38

/*
39
 * per-CPU timer vector definitions:
Linus Torvalds's avatar
Linus Torvalds committed
40 41 42 43 44 45 46 47
 */
#define TVN_BITS 6
#define TVR_BITS 8
#define TVN_SIZE (1 << TVN_BITS)
#define TVR_SIZE (1 << TVR_BITS)
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)

48
typedef struct tvec_s {
Linus Torvalds's avatar
Linus Torvalds committed
49
	struct list_head vec[TVN_SIZE];
50
} tvec_t;
Linus Torvalds's avatar
Linus Torvalds committed
51

52
typedef struct tvec_root_s {
Linus Torvalds's avatar
Linus Torvalds committed
53
	struct list_head vec[TVR_SIZE];
54
} tvec_root_t;
Linus Torvalds's avatar
Linus Torvalds committed
55

56 57 58
struct tvec_t_base_s {
	spinlock_t lock;
	unsigned long timer_jiffies;
59
	struct timer_list *running_timer;
60 61 62 63 64 65
	tvec_root_t tv1;
	tvec_t tv2;
	tvec_t tv3;
	tvec_t tv4;
	tvec_t tv5;
} ____cacheline_aligned_in_smp;
Linus Torvalds's avatar
Linus Torvalds committed
66

67
typedef struct tvec_t_base_s tvec_base_t;
Linus Torvalds's avatar
Linus Torvalds committed
68

Andrew Morton's avatar
Andrew Morton committed
69 70 71 72 73 74 75 76
static inline void set_running_timer(tvec_base_t *base,
					struct timer_list *timer)
{
#ifdef CONFIG_SMP
	base->running_timer = timer;
#endif
}

77 78
/* Fake initialization */
static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
Linus Torvalds's avatar
Linus Torvalds committed
79

80
static void check_timer_failed(struct timer_list *timer)
Andrew Morton's avatar
Andrew Morton committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
{
	static int whine_count;
	if (whine_count < 16) {
		whine_count++;
		printk("Uninitialised timer!\n");
		printk("This is just a warning.  Your computer is OK\n");
		printk("function=0x%p, data=0x%lx\n",
			timer->function, timer->data);
		dump_stack();
	}
	/*
	 * Now fix it up
	 */
	spin_lock_init(&timer->lock);
	timer->magic = TIMER_MAGIC;
}

98
static inline void check_timer(struct timer_list *timer)
Andrew Morton's avatar
Andrew Morton committed
99 100 101 102 103
{
	if (timer->magic != TIMER_MAGIC)
		check_timer_failed(timer);
}

104

Andrew Morton's avatar
Andrew Morton committed
105
static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
Linus Torvalds's avatar
Linus Torvalds committed
106 107
{
	unsigned long expires = timer->expires;
108
	unsigned long idx = expires - base->timer_jiffies;
Ingo Molnar's avatar
Ingo Molnar committed
109
	struct list_head *vec;
Linus Torvalds's avatar
Linus Torvalds committed
110

111
	if (idx < TVR_SIZE) {
Linus Torvalds's avatar
Linus Torvalds committed
112
		int i = expires & TVR_MASK;
113
		vec = base->tv1.vec + i;
Linus Torvalds's avatar
Linus Torvalds committed
114 115
	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
		int i = (expires >> TVR_BITS) & TVN_MASK;
116
		vec = base->tv2.vec + i;
Linus Torvalds's avatar
Linus Torvalds committed
117 118
	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
119
		vec = base->tv3.vec + i;
Linus Torvalds's avatar
Linus Torvalds committed
120 121
	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
122
		vec = base->tv4.vec + i;
Linus Torvalds's avatar
Linus Torvalds committed
123
	} else if ((signed long) idx < 0) {
124 125
		/*
		 * Can happen if you add a timer with expires == jiffies,
Linus Torvalds's avatar
Linus Torvalds committed
126 127
		 * or you set a timer to go off in the past
		 */
Andrew Morton's avatar
Andrew Morton committed
128
		vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
Linus Torvalds's avatar
Linus Torvalds committed
129 130
	} else if (idx <= 0xffffffffUL) {
		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
131
		vec = base->tv5.vec + i;
Anton Blanchard's avatar
Anton Blanchard committed
132
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
133
		/* Can only get here on architectures with 64-bit jiffies */
Anton Blanchard's avatar
Anton Blanchard committed
134
		INIT_LIST_HEAD(&timer->entry);
Linus Torvalds's avatar
Linus Torvalds committed
135
		return;
Anton Blanchard's avatar
Anton Blanchard committed
136
	}
Linus Torvalds's avatar
Linus Torvalds committed
137
	/*
138
	 * Timers are FIFO:
Linus Torvalds's avatar
Linus Torvalds committed
139
	 */
Ingo Molnar's avatar
Ingo Molnar committed
140
	list_add_tail(&timer->entry, vec);
Linus Torvalds's avatar
Linus Torvalds committed
141 142
}

Ingo Molnar's avatar
Ingo Molnar committed
143 144 145 146 147 148 149 150 151 152 153 154 155 156
/***
 * add_timer - start a timer
 * @timer: the timer to be added
 *
 * The kernel will do a ->function(->data) callback from the
 * timer interrupt at the ->expired point in the future. The
 * current time is 'jiffies'.
 *
 * The timer's ->expired, ->function (and if the handler uses it, ->data)
 * fields must be set prior calling this function.
 *
 * Timers with an ->expired field in the past will be executed in the next
 * timer tick. It's illegal to add an already pending timer.
 */
157
void add_timer(struct timer_list *timer)
Linus Torvalds's avatar
Linus Torvalds committed
158
{
159
	tvec_base_t *base = &get_cpu_var(tvec_bases);
160 161
  	unsigned long flags;
  
Ingo Molnar's avatar
Ingo Molnar committed
162
  	BUG_ON(timer_pending(timer) || !timer->function);
163

Andrew Morton's avatar
Andrew Morton committed
164 165
	check_timer(timer);

166 167 168 169
	spin_lock_irqsave(&base->lock, flags);
	internal_add_timer(base, timer);
	timer->base = base;
	spin_unlock_irqrestore(&base->lock, flags);
170
	put_cpu_var(tvec_bases);
Linus Torvalds's avatar
Linus Torvalds committed
171 172
}

173 174 175 176 177 178 179 180 181
/***
 * add_timer_on - start a timer on a particular CPU
 * @timer: the timer to be added
 * @cpu: the CPU to start it on
 *
 * This is not very scalable on SMP.
 */
void add_timer_on(struct timer_list *timer, int cpu)
{
182
	tvec_base_t *base = &per_cpu(tvec_bases, cpu);
183 184 185 186
  	unsigned long flags;
  
  	BUG_ON(timer_pending(timer) || !timer->function);

Andrew Morton's avatar
Andrew Morton committed
187 188
	check_timer(timer);

189 190 191 192 193 194
	spin_lock_irqsave(&base->lock, flags);
	internal_add_timer(base, timer);
	timer->base = base;
	spin_unlock_irqrestore(&base->lock, flags);
}

Ingo Molnar's avatar
Ingo Molnar committed
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
/***
 * mod_timer - modify a timer's timeout
 * @timer: the timer to be modified
 *
 * mod_timer is a more efficient way to update the expire field of an
 * active timer (if the timer is inactive it will be activated)
 *
 * mod_timer(timer, expires) is equivalent to:
 *
 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 *
 * Note that if there are multiple unserialized concurrent users of the
 * same timer, then mod_timer() is the only safe way to modify the timeout,
 * since add_timer() cannot modify an already running timer.
 *
 * The function returns whether it has modified a pending timer or not.
 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
 * active timer returns 1.)
213
 */
214
int mod_timer(struct timer_list *timer, unsigned long expires)
Linus Torvalds's avatar
Linus Torvalds committed
215
{
216
	tvec_base_t *old_base, *new_base;
Linus Torvalds's avatar
Linus Torvalds committed
217
	unsigned long flags;
Ingo Molnar's avatar
Ingo Molnar committed
218
	int ret = 0;
219

Ingo Molnar's avatar
Ingo Molnar committed
220
	BUG_ON(!timer->function);
Andrew Morton's avatar
Andrew Morton committed
221 222 223

	check_timer(timer);

Ingo Molnar's avatar
Ingo Molnar committed
224 225 226 227 228 229
	/*
	 * This is a common optimization triggered by the
	 * networking code - if the timer is re-modified
	 * to be the same thing then just return:
	 */
	if (timer->expires == expires && timer_pending(timer))
230 231
		return 1;

Andrew Morton's avatar
Andrew Morton committed
232
	spin_lock_irqsave(&timer->lock, flags);
233
	new_base = &__get_cpu_var(tvec_bases);
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
repeat:
	old_base = timer->base;

	/*
	 * Prevent deadlocks via ordering by old_base < new_base.
	 */
	if (old_base && (new_base != old_base)) {
		if (old_base < new_base) {
			spin_lock(&new_base->lock);
			spin_lock(&old_base->lock);
		} else {
			spin_lock(&old_base->lock);
			spin_lock(&new_base->lock);
		}
		/*
Andrew Morton's avatar
Andrew Morton committed
249
		 * The timer base might have been cancelled while we were
Ingo Molnar's avatar
Ingo Molnar committed
250
		 * trying to take the lock(s):
251 252 253 254 255 256 257 258
		 */
		if (timer->base != old_base) {
			spin_unlock(&new_base->lock);
			spin_unlock(&old_base->lock);
			goto repeat;
		}
	} else
		spin_lock(&new_base->lock);
Linus Torvalds's avatar
Linus Torvalds committed
259

Ingo Molnar's avatar
Ingo Molnar committed
260 261 262 263 264 265 266 267
	/*
	 * Delete the previous timeout (if there was any), and install
	 * the new one:
	 */
	if (old_base) {
		list_del(&timer->entry);
		ret = 1;
	}
Linus Torvalds's avatar
Linus Torvalds committed
268
	timer->expires = expires;
269 270 271 272 273
	internal_add_timer(new_base, timer);
	timer->base = new_base;

	if (old_base && (new_base != old_base))
		spin_unlock(&old_base->lock);
Andrew Morton's avatar
Andrew Morton committed
274 275
	spin_unlock(&new_base->lock);
	spin_unlock_irqrestore(&timer->lock, flags);
276

Linus Torvalds's avatar
Linus Torvalds committed
277 278 279
	return ret;
}

Ingo Molnar's avatar
Ingo Molnar committed
280 281 282 283 284 285 286 287 288 289 290
/***
 * del_timer - deactive a timer.
 * @timer: the timer to be deactivated
 *
 * del_timer() deactivates a timer - this works on both active and inactive
 * timers.
 *
 * The function returns whether it has deactivated a pending timer or not.
 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
 * active timer returns 1.)
 */
291
int del_timer(struct timer_list *timer)
Linus Torvalds's avatar
Linus Torvalds committed
292 293
{
	unsigned long flags;
Ingo Molnar's avatar
Ingo Molnar committed
294
	tvec_base_t *base;
Linus Torvalds's avatar
Linus Torvalds committed
295

Andrew Morton's avatar
Andrew Morton committed
296 297
	check_timer(timer);

298 299
repeat:
 	base = timer->base;
Ingo Molnar's avatar
Ingo Molnar committed
300 301
	if (!base)
		return 0;
302 303 304 305 306
	spin_lock_irqsave(&base->lock, flags);
	if (base != timer->base) {
		spin_unlock_irqrestore(&base->lock, flags);
		goto repeat;
	}
Ingo Molnar's avatar
Ingo Molnar committed
307 308
	list_del(&timer->entry);
	timer->base = NULL;
309 310
	spin_unlock_irqrestore(&base->lock, flags);

Ingo Molnar's avatar
Ingo Molnar committed
311
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
312 313 314
}

#ifdef CONFIG_SMP
Ingo Molnar's avatar
Ingo Molnar committed
315 316 317 318 319 320 321 322 323 324 325 326 327 328
/***
 * del_timer_sync - deactivate a timer and wait for the handler to finish.
 * @timer: the timer to be deactivated
 *
 * This function only differs from del_timer() on SMP: besides deactivating
 * the timer it also makes sure the handler has finished executing on other
 * CPUs.
 *
 * Synchronization rules: callers must prevent restarting of the timer,
 * otherwise this function is meaningless. It must not be called from
 * interrupt contexts. Upon exit the timer is not queued and the handler
 * is not running on any CPU.
 *
 * The function returns whether it has deactivated a pending timer or not.
Linus Torvalds's avatar
Linus Torvalds committed
329
 */
330
int del_timer_sync(struct timer_list *timer)
Linus Torvalds's avatar
Linus Torvalds committed
331
{
332
	tvec_base_t *base;
Ingo Molnar's avatar
Ingo Molnar committed
333
	int i, ret = 0;
Ingo Molnar's avatar
Ingo Molnar committed
334

Andrew Morton's avatar
Andrew Morton committed
335 336
	check_timer(timer);

Ingo Molnar's avatar
Ingo Molnar committed
337 338
del_again:
	ret += del_timer(timer);
Ingo Molnar's avatar
Ingo Molnar committed
339

340
	for (i = 0; i < NR_CPUS; i++) {
Ingo Molnar's avatar
Ingo Molnar committed
341 342
		if (!cpu_online(i))
			continue;
343 344

		base = &per_cpu(tvec_bases, i);
Ingo Molnar's avatar
Ingo Molnar committed
345 346 347
		if (base->running_timer == timer) {
			while (base->running_timer == timer) {
				cpu_relax();
Ingo Molnar's avatar
Ingo Molnar committed
348
				preempt_check_resched();
Ingo Molnar's avatar
Ingo Molnar committed
349
			}
Linus Torvalds's avatar
Linus Torvalds committed
350
			break;
Ingo Molnar's avatar
Ingo Molnar committed
351
		}
Linus Torvalds's avatar
Linus Torvalds committed
352
	}
Ingo Molnar's avatar
Ingo Molnar committed
353 354 355
	if (timer_pending(timer))
		goto del_again;

Linus Torvalds's avatar
Linus Torvalds committed
356 357 358 359 360
	return ret;
}
#endif


Andrew Morton's avatar
Andrew Morton committed
361
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
Linus Torvalds's avatar
Linus Torvalds committed
362 363
{
	/* cascade all the timers from tv up one level */
Andrew Morton's avatar
Andrew Morton committed
364
	struct list_head *head, *curr;
Linus Torvalds's avatar
Linus Torvalds committed
365

Andrew Morton's avatar
Andrew Morton committed
366
	head = tv->vec + index;
Linus Torvalds's avatar
Linus Torvalds committed
367 368 369 370 371 372
	curr = head->next;
	/*
	 * We are removing _all_ timers from the list, so we don't  have to
	 * detach them individually, just clear the list afterwards.
	 */
	while (curr != head) {
373
		struct timer_list *tmp;
Linus Torvalds's avatar
Linus Torvalds committed
374

375
		tmp = list_entry(curr, struct timer_list, entry);
Andrew Morton's avatar
Andrew Morton committed
376 377
		BUG_ON(tmp->base != base);
		curr = curr->next;
378
		internal_add_timer(base, tmp);
Linus Torvalds's avatar
Linus Torvalds committed
379 380
	}
	INIT_LIST_HEAD(head);
Ingo Molnar's avatar
Ingo Molnar committed
381

Andrew Morton's avatar
Andrew Morton committed
382
	return index;
Linus Torvalds's avatar
Linus Torvalds committed
383 384
}

Ingo Molnar's avatar
Ingo Molnar committed
385 386 387 388 389 390 391
/***
 * __run_timers - run all expired timers (if any) on this CPU.
 * @base: the timer vector to be processed.
 *
 * This function cascades all vectors and executes all expired timer
 * vectors.
 */
Andrew Morton's avatar
Andrew Morton committed
392 393
#define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK

Ingo Molnar's avatar
Ingo Molnar committed
394
static inline void __run_timers(tvec_base_t *base)
Linus Torvalds's avatar
Linus Torvalds committed
395
{
396 397
	struct timer_list *timer;

Ingo Molnar's avatar
Ingo Molnar committed
398
	spin_lock_irq(&base->lock);
Andrew Morton's avatar
Andrew Morton committed
399
	while (time_after_eq(jiffies, base->timer_jiffies)) {
400 401
		struct list_head work_list = LIST_HEAD_INIT(work_list);
		struct list_head *head = &work_list;
Andrew Morton's avatar
Andrew Morton committed
402 403
 		int index = base->timer_jiffies & TVR_MASK;
 
404 405 406
		/*
		 * Cascade timers:
		 */
Andrew Morton's avatar
Andrew Morton committed
407 408 409 410 411
		if (!index &&
			(!cascade(base, &base->tv2, INDEX(0))) &&
				(!cascade(base, &base->tv3, INDEX(1))) &&
					!cascade(base, &base->tv4, INDEX(2)))
			cascade(base, &base->tv5, INDEX(3));
412
		++base->timer_jiffies; 
413
		list_splice_init(base->tv1.vec + index, &work_list);
Linus Torvalds's avatar
Linus Torvalds committed
414
repeat:
Andrew Morton's avatar
Andrew Morton committed
415
		if (!list_empty(head)) {
Linus Torvalds's avatar
Linus Torvalds committed
416 417 418
			void (*fn)(unsigned long);
			unsigned long data;

Andrew Morton's avatar
Andrew Morton committed
419
			timer = list_entry(head->next,struct timer_list,entry);
Linus Torvalds's avatar
Linus Torvalds committed
420
 			fn = timer->function;
421
 			data = timer->data;
Linus Torvalds's avatar
Linus Torvalds committed
422

Ingo Molnar's avatar
Ingo Molnar committed
423 424
			list_del(&timer->entry);
			timer->base = NULL;
Andrew Morton's avatar
Andrew Morton committed
425
			set_running_timer(base, timer);
426
			spin_unlock_irq(&base->lock);
Andrew Morton's avatar
Andrew Morton committed
427
			fn(data);
428
			spin_lock_irq(&base->lock);
Linus Torvalds's avatar
Linus Torvalds committed
429 430 431
			goto repeat;
		}
	}
Andrew Morton's avatar
Andrew Morton committed
432
	set_running_timer(base, NULL);
Ingo Molnar's avatar
Ingo Molnar committed
433
	spin_unlock_irq(&base->lock);
Linus Torvalds's avatar
Linus Torvalds committed
434 435
}

436
/******************************************************************/
Linus Torvalds's avatar
Linus Torvalds committed
437

438 439 440
/*
 * Timekeeping variables
 */
441 442
unsigned long tick_usec = TICK_USEC; 		/* USER_HZ period (usec) */
unsigned long tick_nsec = TICK_NSEC;		/* ACTHZ period (nsec) */
Linus Torvalds's avatar
Linus Torvalds committed
443

444 445 446 447 448 449 450 451
/* 
 * The current time 
 * wall_to_monotonic is what we need to add to xtime (or xtime corrected 
 * for sub jiffie times) to get to monotonic time.  Monotonic is pegged at zero
 * at zero at system boot time, so wall_to_monotonic will be negative,
 * however, we will ALWAYS keep the tv_nsec part positive so we can use
 * the usual normalization.
 */
452
struct timespec xtime __attribute__ ((aligned (16)));
453
struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471

/* Don't completely fail for HZ > 500.  */
int tickadj = 500/HZ ? : 1;		/* microsecs */


/*
 * phase-lock loop variables
 */
/* TIME_ERROR prevents overwriting the CMOS clock */
int time_state = TIME_OK;		/* clock synchronization status	*/
int time_status = STA_UNSYNC;		/* clock status bits		*/
long time_offset;			/* time adjustment (us)		*/
long time_constant = 2;			/* pll time constant		*/
long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
long time_precision = 1;		/* clock precision (us)		*/
long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
long time_phase;			/* phase offset (scaled us)	*/
472
long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
473 474 475 476 477
					/* frequency offset (scaled ppm)*/
long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
long time_reftime;			/* time at last adjustment (s)	*/
long time_adjust;

Linus Torvalds's avatar
Linus Torvalds committed
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
/*
 * this routine handles the overflow of the microsecond field
 *
 * The tricky bits of code to handle the accurate clock support
 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 * They were originally developed for SUN and DEC kernels.
 * All the kudos should go to Dave for this stuff.
 *
 */
static void second_overflow(void)
{
    long ltemp;

    /* Bump the maxerror field */
    time_maxerror += time_tolerance >> SHIFT_USEC;
    if ( time_maxerror > NTP_PHASE_LIMIT ) {
	time_maxerror = NTP_PHASE_LIMIT;
	time_status |= STA_UNSYNC;
    }

    /*
     * Leap second processing. If in leap-insert state at
     * the end of the day, the system clock is set back one
     * second; if in leap-delete state, the system clock is
     * set ahead one second. The microtime() routine or
     * external clock driver will insure that reported time
     * is always monotonic. The ugly divides should be
     * replaced.
     */
    switch (time_state) {

    case TIME_OK:
	if (time_status & STA_INS)
	    time_state = TIME_INS;
	else if (time_status & STA_DEL)
	    time_state = TIME_DEL;
	break;

    case TIME_INS:
	if (xtime.tv_sec % 86400 == 0) {
	    xtime.tv_sec--;
519
	    wall_to_monotonic.tv_sec++;
520
	    time_interpolator_update(-NSEC_PER_SEC);
Linus Torvalds's avatar
Linus Torvalds committed
521
	    time_state = TIME_OOP;
522
	    clock_was_set();
Linus Torvalds's avatar
Linus Torvalds committed
523 524 525 526 527 528 529
	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
	}
	break;

    case TIME_DEL:
	if ((xtime.tv_sec + 1) % 86400 == 0) {
	    xtime.tv_sec++;
530
	    wall_to_monotonic.tv_sec--;
531
	    time_interpolator_update(NSEC_PER_SEC);
Linus Torvalds's avatar
Linus Torvalds committed
532
	    time_state = TIME_WAIT;
533
	    clock_was_set();
Linus Torvalds's avatar
Linus Torvalds committed
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
	}
	break;

    case TIME_OOP:
	time_state = TIME_WAIT;
	break;

    case TIME_WAIT:
	if (!(time_status & (STA_INS | STA_DEL)))
	    time_state = TIME_OK;
    }

    /*
     * Compute the phase adjustment for the next second. In
     * PLL mode, the offset is reduced by a fixed factor
     * times the time constant. In FLL mode the offset is
     * used directly. In either mode, the maximum phase
     * adjustment for each second is clamped so as to spread
     * the adjustment over not more than the number of
     * seconds between updates.
     */
    if (time_offset < 0) {
	ltemp = -time_offset;
	if (!(time_status & STA_FLL))
	    ltemp >>= SHIFT_KG + time_constant;
	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
	time_offset += ltemp;
	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
    } else {
	ltemp = time_offset;
	if (!(time_status & STA_FLL))
	    ltemp >>= SHIFT_KG + time_constant;
	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
	time_offset -= ltemp;
	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
    }

    /*
     * Compute the frequency estimate and additional phase
     * adjustment due to frequency error for the next
     * second. When the PPS signal is engaged, gnaw on the
     * watchdog counter and update the frequency computed by
     * the pll and the PPS signal.
     */
    pps_valid++;
    if (pps_valid == PPS_VALID) {	/* PPS signal lost */
	pps_jitter = MAXTIME;
	pps_stabil = MAXFREQ;
	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
			 STA_PPSWANDER | STA_PPSERROR);
    }
    ltemp = time_freq + pps_freq;
    if (ltemp < 0)
	time_adj -= -ltemp >>
	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
    else
	time_adj += ltemp >>
	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);

#if HZ == 100
    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
     */
    if (time_adj < 0)
	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
    else
	time_adj += (time_adj >> 2) + (time_adj >> 5);
#endif
}

/* in the NTP reference this is called "hardclock()" */
static void update_wall_time_one_tick(void)
{
610
	long time_adjust_step, delta_nsec;
611

Linus Torvalds's avatar
Linus Torvalds committed
612 613 614 615 616 617 618 619 620 621 622 623 624 625
	if ( (time_adjust_step = time_adjust) != 0 ) {
	    /* We are doing an adjtime thing. 
	     *
	     * Prepare time_adjust_step to be within bounds.
	     * Note that a positive time_adjust means we want the clock
	     * to run faster.
	     *
	     * Limit the amount of the step to be in the range
	     * -tickadj .. +tickadj
	     */
	     if (time_adjust > tickadj)
		time_adjust_step = tickadj;
	     else if (time_adjust < -tickadj)
		time_adjust_step = -tickadj;
626

Linus Torvalds's avatar
Linus Torvalds committed
627 628 629
	    /* Reduce by this step the amount of time left  */
	    time_adjust -= time_adjust_step;
	}
630
	delta_nsec = tick_nsec + time_adjust_step * 1000;
Linus Torvalds's avatar
Linus Torvalds committed
631 632 633 634 635
	/*
	 * Advance the phase, once it gets to one microsecond, then
	 * advance the tick more.
	 */
	time_phase += time_adj;
636
	if (time_phase <= -FINENSEC) {
637 638
		long ltemp = -time_phase >> (SHIFT_SCALE - 10);
		time_phase += ltemp << (SHIFT_SCALE - 10);
639
		delta_nsec -= ltemp;
Linus Torvalds's avatar
Linus Torvalds committed
640
	}
641
	else if (time_phase >= FINENSEC) {
642 643
		long ltemp = time_phase >> (SHIFT_SCALE - 10);
		time_phase -= ltemp << (SHIFT_SCALE - 10);
644
		delta_nsec += ltemp;
Linus Torvalds's avatar
Linus Torvalds committed
645
	}
646 647
	xtime.tv_nsec += delta_nsec;
	time_interpolator_update(delta_nsec);
Linus Torvalds's avatar
Linus Torvalds committed
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
}

/*
 * Using a loop looks inefficient, but "ticks" is
 * usually just one (we shouldn't be losing ticks,
 * we're doing this this way mainly for interrupt
 * latency reasons, not because we think we'll
 * have lots of lost timer ticks
 */
static void update_wall_time(unsigned long ticks)
{
	do {
		ticks--;
		update_wall_time_one_tick();
	} while (ticks);

664 665
	if (xtime.tv_nsec >= 1000000000) {
	    xtime.tv_nsec -= 1000000000;
Linus Torvalds's avatar
Linus Torvalds committed
666
	    xtime.tv_sec++;
667
	    time_interpolator_update(NSEC_PER_SEC);
Linus Torvalds's avatar
Linus Torvalds committed
668 669 670 671 672 673 674 675 676
	    second_overflow();
	}
}

static inline void do_process_times(struct task_struct *p,
	unsigned long user, unsigned long system)
{
	unsigned long psecs;

677 678
	psecs = (p->utime += user);
	psecs += (p->stime += system);
Linus Torvalds's avatar
Linus Torvalds committed
679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
		/* Send SIGXCPU every second.. */
		if (!(psecs % HZ))
			send_sig(SIGXCPU, p, 1);
		/* and SIGKILL when we go over max.. */
		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
			send_sig(SIGKILL, p, 1);
	}
}

static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
{
	unsigned long it_virt = p->it_virt_value;

	if (it_virt) {
		it_virt -= ticks;
		if (!it_virt) {
			it_virt = p->it_virt_incr;
			send_sig(SIGVTALRM, p, 1);
		}
		p->it_virt_value = it_virt;
	}
}

static inline void do_it_prof(struct task_struct *p)
{
	unsigned long it_prof = p->it_prof_value;

	if (it_prof) {
		if (--it_prof == 0) {
			it_prof = p->it_prof_incr;
			send_sig(SIGPROF, p, 1);
		}
		p->it_prof_value = it_prof;
	}
}

void update_one_process(struct task_struct *p, unsigned long user,
			unsigned long system, int cpu)
{
	do_process_times(p, user, system);
	do_it_virt(p, user);
	do_it_prof(p);
}	

/*
 * Called from the timer interrupt handler to charge one tick to the current 
 * process.  user_tick is 1 if the tick is user time, 0 for system.
 */
void update_process_times(int user_tick)
{
	struct task_struct *p = current;
	int cpu = smp_processor_id(), system = user_tick ^ 1;

	update_one_process(p, user_tick, system, cpu);
Ingo Molnar's avatar
Ingo Molnar committed
734
	run_local_timers();
Ingo Molnar's avatar
Ingo Molnar committed
735
	scheduler_tick(user_tick, system);
Linus Torvalds's avatar
Linus Torvalds committed
736 737 738 739 740 741 742
}

/*
 * Nr of active tasks - counted in fixed-point numbers
 */
static unsigned long count_active_tasks(void)
{
743
	return (nr_running() + nr_uninterruptible()) * FIXED_1;
Linus Torvalds's avatar
Linus Torvalds committed
744 745 746 747 748 749 750
}

/*
 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 * imply that avenrun[] is the standard name for this kind of thing.
 * Nothing else seems to be standardized: the fractional size etc
 * all seem to differ on different machines.
Robert Love's avatar
Robert Love committed
751 752
 *
 * Requires xtime_lock to access.
Linus Torvalds's avatar
Linus Torvalds committed
753 754 755
 */
unsigned long avenrun[3];

Robert Love's avatar
Robert Love committed
756 757 758 759
/*
 * calc_load - given tick count, update the avenrun load estimates.
 * This is called while holding a write_lock on xtime_lock.
 */
Linus Torvalds's avatar
Linus Torvalds committed
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
static inline void calc_load(unsigned long ticks)
{
	unsigned long active_tasks; /* fixed-point */
	static int count = LOAD_FREQ;

	count -= ticks;
	if (count < 0) {
		count += LOAD_FREQ;
		active_tasks = count_active_tasks();
		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
	}
}

/* jiffies at the most recent update of wall time */
776
unsigned long wall_jiffies = INITIAL_JIFFIES;
Linus Torvalds's avatar
Linus Torvalds committed
777 778

/*
Robert Love's avatar
Robert Love committed
779 780
 * This read-write spinlock protects us from races in SMP while
 * playing with xtime and avenrun.
Linus Torvalds's avatar
Linus Torvalds committed
781
 */
782
#ifndef ARCH_HAVE_XTIME_LOCK
783
seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
784
#endif
Linus Torvalds's avatar
Linus Torvalds committed
785

786
/*
787
 * This function runs timers and the timer-tq in bottom half context.
788
 */
789
static void run_timer_softirq(struct softirq_action *h)
790
{
791
	tvec_base_t *base = &__get_cpu_var(tvec_bases);
792

Andrew Morton's avatar
Andrew Morton committed
793
	if (time_after_eq(jiffies, base->timer_jiffies))
794 795 796 797 798 799 800 801
		__run_timers(base);
}

/*
 * Called by the local, per-CPU timer interrupt on SMP.
 */
void run_local_timers(void)
{
802
	raise_softirq(TIMER_SOFTIRQ);
803 804 805 806 807 808
}

/*
 * Called by the timer interrupt. xtime_lock must already be taken
 * by the timer IRQ!
 */
Linus Torvalds's avatar
Linus Torvalds committed
809 810 811 812 813 814 815 816 817 818 819
static inline void update_times(void)
{
	unsigned long ticks;

	ticks = jiffies - wall_jiffies;
	if (ticks) {
		wall_jiffies += ticks;
		update_wall_time(ticks);
	}
	calc_load(ticks);
}
820 821 822
  
/*
 * The 64-bit jiffies value is not atomic - you MUST NOT read it
823
 * without sampling the sequence number in xtime_lock.
824 825
 * jiffies is defined in the linker script...
 */
Linus Torvalds's avatar
Linus Torvalds committed
826 827 828

void do_timer(struct pt_regs *regs)
{
829
	jiffies_64++;
Linus Torvalds's avatar
Linus Torvalds committed
830 831 832 833 834
#ifndef CONFIG_SMP
	/* SMP process accounting uses the local APIC timer */

	update_process_times(user_mode(regs));
#endif
835
	update_times();
Linus Torvalds's avatar
Linus Torvalds committed
836 837 838 839
}

#if !defined(__alpha__) && !defined(__ia64__)

840 841
extern int do_setitimer(int, struct itimerval *, struct itimerval *);

Linus Torvalds's avatar
Linus Torvalds committed
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
/*
 * For backwards compatibility?  This can be done in libc so Alpha
 * and all newer ports shouldn't need it.
 */
asmlinkage unsigned long sys_alarm(unsigned int seconds)
{
	struct itimerval it_new, it_old;
	unsigned int oldalarm;

	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
	it_new.it_value.tv_sec = seconds;
	it_new.it_value.tv_usec = 0;
	do_setitimer(ITIMER_REAL, &it_new, &it_old);
	oldalarm = it_old.it_value.tv_sec;
	/* ehhh.. We can't return 0 if we have an alarm pending.. */
	/* And we'd better return too much than too little anyway */
	if (it_old.it_value.tv_usec)
		oldalarm++;
	return oldalarm;
}

#endif

#ifndef __alpha__

/*
 * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 * should be moved into arch/i386 instead?
 */
Robert Love's avatar
Robert Love committed
871 872 873 874 875 876 877 878 879 880

/**
 * sys_getpid - return the thread group id of the current process
 *
 * Note, despite the name, this returns the tgid not the pid.  The tgid and
 * the pid are identical unless CLONE_THREAD was specified on clone() in
 * which case the tgid is the same in all threads of the same group.
 *
 * This is SMP safe as current->tgid does not change.
 */
Linus Torvalds's avatar
Linus Torvalds committed
881 882 883 884 885 886
asmlinkage long sys_getpid(void)
{
	return current->tgid;
}

/*
Ingo Molnar's avatar
Ingo Molnar committed
887 888
 * Accessing ->group_leader->real_parent is not SMP-safe, it could
 * change from under us. However, rather than getting any lock
Linus Torvalds's avatar
Linus Torvalds committed
889 890 891 892 893 894 895
 * we can use an optimistic algorithm: get the parent
 * pid, and go back and check that the parent is still
 * the same. If it has changed (which is extremely unlikely
 * indeed), we just try again..
 *
 * NOTE! This depends on the fact that even if we _do_
 * get an old value of "parent", we can happily dereference
Ingo Molnar's avatar
Ingo Molnar committed
896 897
 * the pointer (it was and remains a dereferencable kernel pointer
 * no matter what): we just can't necessarily trust the result
Linus Torvalds's avatar
Linus Torvalds committed
898 899
 * until we know that the parent pointer is valid.
 *
Ingo Molnar's avatar
Ingo Molnar committed
900
 * NOTE2: ->group_leader never changes from under us.
Linus Torvalds's avatar
Linus Torvalds committed
901 902 903 904
 */
asmlinkage long sys_getppid(void)
{
	int pid;
Ingo Molnar's avatar
Ingo Molnar committed
905 906
	struct task_struct *me = current;
	struct task_struct *parent;
Linus Torvalds's avatar
Linus Torvalds committed
907

Ingo Molnar's avatar
Ingo Molnar committed
908
	parent = me->group_leader->real_parent;
Linus Torvalds's avatar
Linus Torvalds committed
909
	for (;;) {
Ingo Molnar's avatar
Ingo Molnar committed
910
		pid = parent->tgid;
911
#ifdef CONFIG_SMP
Linus Torvalds's avatar
Linus Torvalds committed
912 913
{
		struct task_struct *old = parent;
Ingo Molnar's avatar
Ingo Molnar committed
914 915 916 917 918 919 920

		/*
		 * Make sure we read the pid before re-reading the
		 * parent pointer:
		 */
		rmb();
		parent = me->group_leader->real_parent;
Linus Torvalds's avatar
Linus Torvalds committed
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
		if (old != parent)
			continue;
}
#endif
		break;
	}
	return pid;
}

asmlinkage long sys_getuid(void)
{
	/* Only we change this so SMP safe */
	return current->uid;
}

asmlinkage long sys_geteuid(void)
{
	/* Only we change this so SMP safe */
	return current->euid;
}

asmlinkage long sys_getgid(void)
{
	/* Only we change this so SMP safe */
	return current->gid;
}

asmlinkage long sys_getegid(void)
{
	/* Only we change this so SMP safe */
	return  current->egid;
}

#endif

Linus Torvalds's avatar
Linus Torvalds committed
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988
static void process_timeout(unsigned long __data)
{
	wake_up_process((task_t *)__data);
}

/**
 * schedule_timeout - sleep until timeout
 * @timeout: timeout value in jiffies
 *
 * Make the current task sleep until @timeout jiffies have
 * elapsed. The routine will return immediately unless
 * the current task state has been set (see set_current_state()).
 *
 * You can set the task state as follows -
 *
 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
 * pass before the routine returns. The routine will return 0
 *
 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 * delivered to the current task. In this case the remaining time
 * in jiffies will be returned, or 0 if the timer expired in time
 *
 * The current task state is guaranteed to be TASK_RUNNING when this
 * routine returns.
 *
 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
 * the CPU away without a bound on the timeout. In this case the return
 * value will be %MAX_SCHEDULE_TIMEOUT.
 *
 * In all cases the return value is guaranteed to be non-negative.
 */
signed long schedule_timeout(signed long timeout)
{
989
	struct timer_list timer;
Linus Torvalds's avatar
Linus Torvalds committed
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
	unsigned long expire;

	switch (timeout)
	{
	case MAX_SCHEDULE_TIMEOUT:
		/*
		 * These two special cases are useful to be comfortable
		 * in the caller. Nothing more. We could take
		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
		 * but I' d like to return a valid offset (>=0) to allow
		 * the caller to do everything it want with the retval.
		 */
		schedule();
		goto out;
	default:
		/*
		 * Another bit of PARANOID. Note that the retval will be
		 * 0 since no piece of kernel is supposed to do a check
		 * for a negative retval of schedule_timeout() (since it
		 * should never happens anyway). You just have the printk()
		 * that will tell you if something is gone wrong and where.
		 */
		if (timeout < 0)
		{
			printk(KERN_ERR "schedule_timeout: wrong timeout "
			       "value %lx from %p\n", timeout,
			       __builtin_return_address(0));
			current->state = TASK_RUNNING;
			goto out;
		}
	}

	expire = timeout + jiffies;

	init_timer(&timer);
	timer.expires = expire;
	timer.data = (unsigned long) current;
	timer.function = process_timeout;

	add_timer(&timer);
	schedule();
	del_timer_sync(&timer);

	timeout = expire - jiffies;

 out:
	return timeout < 0 ? 0 : timeout;
}

Linus Torvalds's avatar
Linus Torvalds committed
1039 1040 1041 1042 1043
/* Thread ID - the internal kernel "pid" */
asmlinkage long sys_gettid(void)
{
	return current->pid;
}
1044
#ifndef FOLD_NANO_SLEEP_INTO_CLOCK_NANO_SLEEP
Linus Torvalds's avatar
Linus Torvalds committed
1045

1046
static long nanosleep_restart(struct restart_block *restart)
Linus Torvalds's avatar
Linus Torvalds committed
1047
{
1048 1049 1050
	unsigned long expire = restart->arg0, now = jiffies;
	struct timespec *rmtp = (struct timespec *) restart->arg1;
	long ret;
Linus Torvalds's avatar
Linus Torvalds committed
1051

1052 1053 1054
	/* Did it expire while we handled signals? */
	if (!time_after(expire, now))
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1055 1056

	current->state = TASK_INTERRUPTIBLE;
1057
	expire = schedule_timeout(expire - now);
Linus Torvalds's avatar
Linus Torvalds committed
1058

1059
	ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1060
	if (expire) {
1061 1062 1063 1064
		struct timespec t;
		jiffies_to_timespec(expire, &t);

		ret = -ERESTART_RESTARTBLOCK;
1065
		if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
1066 1067
			ret = -EFAULT;
		/* The 'restart' block is already filled in */
Linus Torvalds's avatar
Linus Torvalds committed
1068
	}
1069
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1070 1071
}

1072 1073 1074
asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
{
	struct timespec t;
1075
	unsigned long expire;
1076 1077 1078 1079 1080
	long ret;

	if (copy_from_user(&t, rqtp, sizeof(t)))
		return -EFAULT;

1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
		return -EINVAL;

	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
	current->state = TASK_INTERRUPTIBLE;
	expire = schedule_timeout(expire);

	ret = 0;
	if (expire) {
		struct restart_block *restart;
		jiffies_to_timespec(expire, &t);
1092
		if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
1093
			return -EFAULT;
1094 1095 1096 1097 1098 1099

		restart = &current_thread_info()->restart_block;
		restart->fn = nanosleep_restart;
		restart->arg0 = jiffies + expire;
		restart->arg1 = (unsigned long) rmtp;
		ret = -ERESTART_RESTARTBLOCK;
1100 1101 1102
	}
	return ret;
}
1103
#endif // ! FOLD_NANO_SLEEP_INTO_CLOCK_NANO_SLEEP
1104

Robert Love's avatar
Robert Love committed
1105 1106 1107
/*
 * sys_sysinfo - fill in sysinfo struct
 */ 
1108
asmlinkage long sys_sysinfo(struct sysinfo __user *info)
Robert Love's avatar
Robert Love committed
1109 1110 1111 1112
{
	struct sysinfo val;
	unsigned long mem_total, sav_total;
	unsigned int mem_unit, bitcount;
1113
	unsigned long seq;
Robert Love's avatar
Robert Love committed
1114 1115 1116

	memset((char *)&val, 0, sizeof(struct sysinfo));

1117
	do {
1118
		struct timespec tp;
1119 1120
		seq = read_seqbegin(&xtime_lock);

1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
		/*
		 * This is annoying.  The below is the same thing
		 * posix_get_clock_monotonic() does, but it wants to
		 * take the lock which we want to cover the loads stuff
		 * too.
		 */

		do_gettimeofday((struct timeval *)&tp);
		tp.tv_nsec *= NSEC_PER_USEC;
		tp.tv_sec += wall_to_monotonic.tv_sec;
		tp.tv_nsec += wall_to_monotonic.tv_nsec;
		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
			tp.tv_sec++;
		}
		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
Robert Love's avatar
Robert Love committed
1137

1138 1139 1140
		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
Robert Love's avatar
Robert Love committed
1141

1142 1143
		val.procs = nr_threads;
	} while (read_seqretry(&xtime_lock, seq));
Robert Love's avatar
Robert Love committed
1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187

	si_meminfo(&val);
	si_swapinfo(&val);

	/*
	 * If the sum of all the available memory (i.e. ram + swap)
	 * is less than can be stored in a 32 bit unsigned long then
	 * we can be binary compatible with 2.2.x kernels.  If not,
	 * well, in that case 2.2.x was broken anyways...
	 *
	 *  -Erik Andersen <andersee@debian.org>
	 */

	mem_total = val.totalram + val.totalswap;
	if (mem_total < val.totalram || mem_total < val.totalswap)
		goto out;
	bitcount = 0;
	mem_unit = val.mem_unit;
	while (mem_unit > 1) {
		bitcount++;
		mem_unit >>= 1;
		sav_total = mem_total;
		mem_total <<= 1;
		if (mem_total < sav_total)
			goto out;
	}

	/*
	 * If mem_total did not overflow, multiply all memory values by
	 * val.mem_unit and set it to 1.  This leaves things compatible
	 * with 2.2.x, and also retains compatibility with earlier 2.4.x
	 * kernels...
	 */

	val.mem_unit = 1;
	val.totalram <<= bitcount;
	val.freeram <<= bitcount;
	val.sharedram <<= bitcount;
	val.bufferram <<= bitcount;
	val.totalswap <<= bitcount;
	val.freeswap <<= bitcount;
	val.totalhigh <<= bitcount;
	val.freehigh <<= bitcount;

1188
 out:
Robert Love's avatar
Robert Love committed
1189 1190 1191 1192 1193
	if (copy_to_user(info, &val, sizeof(struct sysinfo)))
		return -EFAULT;

	return 0;
}
1194

1195
static void __devinit init_timers_cpu(int cpu)
1196
{
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
	int j;
	tvec_base_t *base;
       
	base = &per_cpu(tvec_bases, cpu);
	spin_lock_init(&base->lock);
	for (j = 0; j < TVN_SIZE; j++) {
		INIT_LIST_HEAD(base->tv5.vec + j);
		INIT_LIST_HEAD(base->tv4.vec + j);
		INIT_LIST_HEAD(base->tv3.vec + j);
		INIT_LIST_HEAD(base->tv2.vec + j);
	}
	for (j = 0; j < TVR_SIZE; j++)
		INIT_LIST_HEAD(base->tv1.vec + j);
1210

Andrew Morton's avatar
Andrew Morton committed
1211
	base->timer_jiffies = jiffies;
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223
}
	
static int __devinit timer_cpu_notify(struct notifier_block *self, 
				unsigned long action, void *hcpu)
{
	long cpu = (long)hcpu;
	switch(action) {
	case CPU_UP_PREPARE:
		init_timers_cpu(cpu);
		break;
	default:
		break;
1224
	}
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
	return NOTIFY_OK;
}

static struct notifier_block __devinitdata timers_nb = {
	.notifier_call	= timer_cpu_notify,
};


void __init init_timers(void)
{
	timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
				(void *)(long)smp_processor_id());
	register_cpu_notifier(&timers_nb);
1238
	open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
1239
}
1240 1241 1242 1243 1244 1245 1246

#ifdef CONFIG_TIME_INTERPOLATION
volatile unsigned long last_nsec_offset;
#ifndef __HAVE_ARCH_CMPXCHG
spinlock_t last_nsec_offset_lock = SPIN_LOCK_UNLOCKED;
#endif

1247
struct time_interpolator *time_interpolator;
Andrew Morton's avatar
Andrew Morton committed
1248 1249
static struct time_interpolator *time_interpolator_list;
static spinlock_t time_interpolator_lock = SPIN_LOCK_UNLOCKED;
1250 1251 1252 1253 1254 1255

static inline int
is_better_time_interpolator(struct time_interpolator *new)
{
	if (!time_interpolator)
		return 1;
1256 1257
	return new->frequency > 2*time_interpolator->frequency ||
	    (unsigned long)new->drift < (unsigned long)time_interpolator->drift;
1258 1259 1260 1261 1262
}

void
register_time_interpolator(struct time_interpolator *ti)
{
1263 1264 1265 1266 1267 1268 1269 1270 1271
	spin_lock(&time_interpolator_lock);
	write_seqlock_irq(&xtime_lock);
	if (is_better_time_interpolator(ti))
		time_interpolator = ti;
	write_sequnlock_irq(&xtime_lock);

	ti->next = time_interpolator_list;
	time_interpolator_list = ti;
	spin_unlock(&time_interpolator_lock);
1272 1273 1274 1275 1276 1277 1278
}

void
unregister_time_interpolator(struct time_interpolator *ti)
{
	struct time_interpolator *curr, **prev;

1279 1280 1281 1282 1283 1284
	spin_lock(&time_interpolator_lock);
	prev = &time_interpolator_list;
	for (curr = *prev; curr; curr = curr->next) {
		if (curr == ti) {
			*prev = curr->next;
			break;
1285
		}
1286
		prev = &curr->next;
1287 1288
	}

1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
	write_seqlock_irq(&xtime_lock);
	if (ti == time_interpolator) {
		/* we lost the best time-interpolator: */
		time_interpolator = NULL;
		/* find the next-best interpolator */
		for (curr = time_interpolator_list; curr; curr = curr->next)
			if (is_better_time_interpolator(curr))
				time_interpolator = curr;
	}
	write_sequnlock_irq(&xtime_lock);
	spin_unlock(&time_interpolator_lock);
}
1301
#endif /* CONFIG_TIME_INTERPOLATION */