Commit 71a2224d authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

[PATCH] Optimize sys_times for a single thread process

Avoid taking the tasklist_lock in sys_times if the process is single
threaded.  In a NUMA system taking the tasklist_lock may cause a bouncing
cacheline if multiple independent processes continually call sys_times to
measure their performance.
Signed-off-by: default avatarChristoph Lameter <christoph@lameter.com>
Signed-off-by: default avatarShai Fultheim <shai@scalex86.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b030a4dd
...@@ -72,6 +72,11 @@ void release_task(struct task_struct * p) ...@@ -72,6 +72,11 @@ void release_task(struct task_struct * p)
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p); __exit_signal(p);
__exit_sighand(p); __exit_sighand(p);
/*
* Note that the fastpath in sys_times depends on __exit_signal having
* updated the counters before a task is removed from the tasklist of
* the process by __unhash_process.
*/
__unhash_process(p); __unhash_process(p);
/* /*
......
...@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf) ...@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
*/ */
if (tbuf) { if (tbuf) {
struct tms tmp; struct tms tmp;
struct task_struct *tsk = current;
struct task_struct *t;
cputime_t utime, stime, cutime, cstime; cputime_t utime, stime, cutime, cstime;
read_lock(&tasklist_lock); #ifdef CONFIG_SMP
utime = tsk->signal->utime; if (thread_group_empty(current)) {
stime = tsk->signal->stime; /*
t = tsk; * Single thread case without the use of any locks.
do { *
utime = cputime_add(utime, t->utime); * We may race with release_task if two threads are
stime = cputime_add(stime, t->stime); * executing. However, release task first adds up the
t = next_thread(t); * counters (__exit_signal) before removing the task
} while (t != tsk); * from the process tasklist (__unhash_process).
* __exit_signal also acquires and releases the
/* * siglock which results in the proper memory ordering
* While we have tasklist_lock read-locked, no dying thread * so that the list modifications are always visible
* can be updating current->signal->[us]time. Instead, * after the counters have been updated.
* we got their counts included in the live thread loop. *
* However, another thread can come in right now and * If the counters have been updated by the second thread
* do a wait call that updates current->signal->c[us]time. * but the thread has not yet been removed from the list
* To make sure we always see that pair updated atomically, * then the other branch will be executing which will
* we take the siglock around fetching them. * block on tasklist_lock until the exit handling of the
*/ * other task is finished.
spin_lock_irq(&tsk->sighand->siglock); *
cutime = tsk->signal->cutime; * This also implies that the sighand->siglock cannot
cstime = tsk->signal->cstime; * be held by another processor. So we can also
spin_unlock_irq(&tsk->sighand->siglock); * skip acquiring that lock.
read_unlock(&tasklist_lock); */
utime = cputime_add(current->signal->utime, current->utime);
stime = cputime_add(current->signal->utime, current->stime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
} else
#endif
{
/* Process with multiple threads */
struct task_struct *tsk = current;
struct task_struct *t;
read_lock(&tasklist_lock);
utime = tsk->signal->utime;
stime = tsk->signal->stime;
t = tsk;
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != tsk);
/*
* While we have tasklist_lock read-locked, no dying thread
* can be updating current->signal->[us]time. Instead,
* we got their counts included in the live thread loop.
* However, another thread can come in right now and
* do a wait call that updates current->signal->c[us]time.
* To make sure we always see that pair updated atomically,
* we take the siglock around fetching them.
*/
spin_lock_irq(&tsk->sighand->siglock);
cutime = tsk->signal->cutime;
cstime = tsk->signal->cstime;
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
}
tmp.tms_utime = cputime_to_clock_t(utime); tmp.tms_utime = cputime_to_clock_t(utime);
tmp.tms_stime = cputime_to_clock_t(stime); tmp.tms_stime = cputime_to_clock_t(stime);
tmp.tms_cutime = cputime_to_clock_t(cutime); tmp.tms_cutime = cputime_to_clock_t(cutime);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment