Commit b9877c90 authored by Tim Schmielau's avatar Tim Schmielau Committed by Linus Torvalds

[PATCH] Fix reporting of process start times

Derive process start times from the posix_clock_monotonic notion of uptime
instead of "jiffies", consistent with the earlier change to /proc/uptime
itself.
(http://linus.bkbits.net:8080/linux-2.5/cset@3ef4851dGg0fxX58R9Zv8SIq9fzNmQ?na%0Av=index.html|src/.|src/fs|src/fs/proc|related/fs/proc/proc_misc.c)

Process start times are reported to userspace in units of 1/USER_HZ since
boot, thus applications as procps need the value of "uptime" to convert
them into absolute time.

Currently "uptime" is derived from an ntp-corrected time base, but process
start time is derived from the free-running "jiffies" counter.  This
results in inaccurate, drifting process start times as seen by the user,
even if the exported number stays constant, because the users notion of
"jiffies" changes in time.

It's John Stultz's patch anyways, which I only messed up a bit, but since
people started trading signed-off lines on lkml:
Signed-off-by: default avatarTim Schmielau <tim@physik3.uni-rostock.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 1a9c15ac
......@@ -360,7 +360,11 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
read_unlock(&tasklist_lock);
/* Temporary variable needed for gcc-2.96 */
start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES);
/* convert timespec -> nsec*/
start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+ task->start_time.tv_nsec;
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
......
......@@ -172,15 +172,22 @@ static inline u32 jiffies_to_AHZ(unsigned long x)
#endif
}
static inline u64 jiffies_64_to_AHZ(u64 x)
static inline u64 nsec_to_AHZ(u64 x)
{
#if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0
#if HZ != AHZ
do_div(x, HZ / AHZ);
#endif
#else
x *= TICK_NSEC;
#if (NSEC_PER_SEC % AHZ) == 0
do_div(x, (NSEC_PER_SEC / AHZ));
#elif (AHZ % 512) == 0
x *= AHZ/512;
do_div(x, (NSEC_PER_SEC / 512));
#else
/*
* max relative error 5.7e-8 (1.8s per year) for AHZ <= 1024,
* overflow after 64.99 years.
* exact for AHZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
*/
x *= 9;
do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (AHZ/2))
/ AHZ));
#endif
return x;
}
......
......@@ -508,7 +508,7 @@ struct task_struct {
struct timer_list real_timer;
unsigned long utime, stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time;
struct timespec start_time;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
/* process credentials */
......
......@@ -55,6 +55,26 @@ static inline u64 jiffies_64_to_clock_t(u64 x)
}
#endif
static inline u64 nsec_to_clock_t(u64 x)
{
#if (NSEC_PER_SEC % USER_HZ) == 0
do_div(x, (NSEC_PER_SEC / USER_HZ));
#elif (USER_HZ % 512) == 0
x *= USER_HZ/512;
do_div(x, (NSEC_PER_SEC / 512));
#else
/*
* max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
* overflow after 64.99 years.
* exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
*/
x *= 9;
do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2))
/ USER_HZ));
#endif
return x;
}
struct tms {
clock_t tms_utime;
clock_t tms_stime;
......
......@@ -384,6 +384,8 @@ static void do_acct_process(long exitcode, struct file *file)
unsigned long vsize;
unsigned long flim;
u64 elapsed;
u64 run_time;
struct timespec uptime;
/*
* First check to see if there is enough free_space to continue
......@@ -401,7 +403,13 @@ static void do_acct_process(long exitcode, struct file *file)
ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
elapsed = jiffies_64_to_AHZ(get_jiffies_64() - current->start_time);
/* calculate run_time in nsec*/
do_posix_clock_monotonic_gettime(&uptime);
run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC
+ current->start_time.tv_nsec;
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ(run_time);
#if ACCT_VERSION==3
ac.ac_etime = encode_float(elapsed);
#else
......
......@@ -992,7 +992,7 @@ static task_t *copy_process(unsigned long clone_flags,
p->utime = p->stime = 0;
p->lock_depth = -1; /* -1 = no lock */
p->start_time = get_jiffies_64();
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
p->io_context = NULL;
p->io_wait = NULL;
......
......@@ -26,6 +26,7 @@
/**
* oom_badness - calculate a numeric value for how bad this task has been
* @p: task struct of which task we should calculate
* @p: current uptime in seconds
*
* The formula used is relatively simple and documented inline in the
* function. The main rationale is that we want to select a good task
......@@ -41,7 +42,7 @@
* of least surprise ... (be careful when you change it)
*/
static unsigned long badness(struct task_struct *p)
static unsigned long badness(struct task_struct *p, unsigned long uptime)
{
unsigned long points, cpu_time, run_time, s;
......@@ -56,12 +57,16 @@ static unsigned long badness(struct task_struct *p)
points = p->mm->total_vm;
/*
* CPU time is in seconds and run time is in minutes. There is no
* particular reason for this other than that it turned out to work
* very well in practice.
* CPU time is in tens of seconds and run time is in thousands
* of seconds. There is no particular reason for this other than
* that it turned out to work very well in practice.
*/
cpu_time = (p->utime + p->stime) >> (SHIFT_HZ + 3);
run_time = (get_jiffies_64() - p->start_time) >> (SHIFT_HZ + 10);
if (uptime >= p->start_time.tv_sec)
run_time = (uptime - p->start_time.tv_sec) >> 10;
else
run_time = 0;
s = int_sqrt(cpu_time);
if (s)
......@@ -111,10 +116,12 @@ static struct task_struct * select_bad_process(void)
unsigned long maxpoints = 0;
struct task_struct *g, *p;
struct task_struct *chosen = NULL;
struct timespec uptime;
do_posix_clock_monotonic_gettime(&uptime);
do_each_thread(g, p)
if (p->pid) {
unsigned long points = badness(p);
unsigned long points = badness(p, uptime.tv_sec);
if (points > maxpoints) {
chosen = p;
maxpoints = points;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment