Commit 297c5d92 authored by Andrea Righi's avatar Andrea Righi Committed by Linus Torvalds

task IO accounting: provide distinct tgid/tid I/O statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io.  This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
  alive before we iterate over the threads, but also says that we can do
  that fixup on top of this later.  - Linus ]
Acked-by: default avatarBalbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0c18d7a5
...@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, ...@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
} }
#ifdef CONFIG_TASK_IO_ACCOUNTING #ifdef CONFIG_TASK_IO_ACCOUNTING
static int proc_pid_io_accounting(struct task_struct *task, char *buffer) static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{ {
u64 rchar, wchar, syscr, syscw;
struct task_io_accounting ioac;
if (!whole) {
rchar = task->rchar;
wchar = task->wchar;
syscr = task->syscr;
syscw = task->syscw;
memcpy(&ioac, &task->ioac, sizeof(ioac));
} else {
unsigned long flags;
struct task_struct *t = task;
rchar = wchar = syscr = syscw = 0;
memset(&ioac, 0, sizeof(ioac));
rcu_read_lock();
do {
rchar += t->rchar;
wchar += t->wchar;
syscr += t->syscr;
syscw += t->syscw;
ioac.read_bytes += t->ioac.read_bytes;
ioac.write_bytes += t->ioac.write_bytes;
ioac.cancelled_write_bytes +=
t->ioac.cancelled_write_bytes;
t = next_thread(t);
} while (t != task);
rcu_read_unlock();
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
rchar += sig->rchar;
wchar += sig->wchar;
syscr += sig->syscr;
syscw += sig->syscw;
ioac.read_bytes += sig->ioac.read_bytes;
ioac.write_bytes += sig->ioac.write_bytes;
ioac.cancelled_write_bytes +=
sig->ioac.cancelled_write_bytes;
unlock_task_sighand(task, &flags);
}
}
return sprintf(buffer, return sprintf(buffer,
#ifdef CONFIG_TASK_XACCT
"rchar: %llu\n" "rchar: %llu\n"
"wchar: %llu\n" "wchar: %llu\n"
"syscr: %llu\n" "syscr: %llu\n"
"syscw: %llu\n" "syscw: %llu\n"
#endif
"read_bytes: %llu\n" "read_bytes: %llu\n"
"write_bytes: %llu\n" "write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n", "cancelled_write_bytes: %llu\n",
#ifdef CONFIG_TASK_XACCT (unsigned long long)rchar,
(unsigned long long)task->rchar, (unsigned long long)wchar,
(unsigned long long)task->wchar, (unsigned long long)syscr,
(unsigned long long)task->syscr, (unsigned long long)syscw,
(unsigned long long)task->syscw, (unsigned long long)ioac.read_bytes,
#endif (unsigned long long)ioac.write_bytes,
(unsigned long long)task->ioac.read_bytes, (unsigned long long)ioac.cancelled_write_bytes);
(unsigned long long)task->ioac.write_bytes, }
(unsigned long long)task->ioac.cancelled_write_bytes);
static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
{
return do_io_accounting(task, buffer, 0);
} }
#endif
static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
{
return do_io_accounting(task, buffer, 1);
}
#endif /* CONFIG_TASK_IO_ACCOUNTING */
/* /*
* Thread groups * Thread groups
...@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = { ...@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
#endif #endif
#ifdef CONFIG_TASK_IO_ACCOUNTING #ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUGO, pid_io_accounting), INF("io", S_IRUGO, tgid_io_accounting),
#endif #endif
}; };
...@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = { ...@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_FAULT_INJECTION #ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
#endif #endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUGO, tid_io_accounting),
#endif
}; };
static int proc_tid_base_readdir(struct file * filp, static int proc_tid_base_readdir(struct file * filp,
......
...@@ -506,6 +506,10 @@ struct signal_struct { ...@@ -506,6 +506,10 @@ struct signal_struct {
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock; unsigned long inblock, oublock, cinblock, coublock;
#ifdef CONFIG_TASK_XACCT
u64 rchar, wchar, syscr, syscw;
#endif
struct task_io_accounting ioac;
/* /*
* Cumulative ns of scheduled CPU time for dead threads in the * Cumulative ns of scheduled CPU time for dead threads in the
......
...@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk) ...@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
sig->nivcsw += tsk->nivcsw; sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk); sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk); sig->oublock += task_io_get_oublock(tsk);
#ifdef CONFIG_TASK_XACCT
sig->rchar += tsk->rchar;
sig->wchar += tsk->wchar;
sig->syscr += tsk->syscr;
sig->syscw += tsk->syscw;
#endif /* CONFIG_TASK_XACCT */
#ifdef CONFIG_TASK_IO_ACCOUNTING
sig->ioac.read_bytes += tsk->ioac.read_bytes;
sig->ioac.write_bytes += tsk->ioac.write_bytes;
sig->ioac.cancelled_write_bytes +=
tsk->ioac.cancelled_write_bytes;
#endif /* CONFIG_TASK_IO_ACCOUNTING */
sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */ sig = NULL; /* Marker for below. */
} }
...@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options, ...@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
psig->coublock += psig->coublock +=
task_io_get_oublock(p) + task_io_get_oublock(p) +
sig->oublock + sig->coublock; sig->oublock + sig->coublock;
#ifdef CONFIG_TASK_XACCT
psig->rchar += p->rchar + sig->rchar;
psig->wchar += p->wchar + sig->wchar;
psig->syscr += p->syscr + sig->syscr;
psig->syscw += p->syscw + sig->syscw;
#endif /* CONFIG_TASK_XACCT */
#ifdef CONFIG_TASK_IO_ACCOUNTING
psig->ioac.read_bytes +=
p->ioac.read_bytes + sig->ioac.read_bytes;
psig->ioac.write_bytes +=
p->ioac.write_bytes + sig->ioac.write_bytes;
psig->ioac.cancelled_write_bytes +=
p->ioac.cancelled_write_bytes +
sig->ioac.cancelled_write_bytes;
#endif /* CONFIG_TASK_IO_ACCOUNTING */
spin_unlock_irq(&p->parent->sighand->siglock); spin_unlock_irq(&p->parent->sighand->siglock);
} }
......
...@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
#ifdef CONFIG_TASK_XACCT
sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
memset(&sig->ioac, 0, sizeof(sig->ioac));
#endif
sig->sum_sched_runtime = 0; sig->sum_sched_runtime = 0;
INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[1]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment