Commit 495789a5 authored by KOSAKI Motohiro's avatar KOSAKI Motohiro Committed by Linus Torvalds

oom: make oom_score to per-process value

oom-killer kills a process, not task.  Then oom_score should be calculated
as per-process too.  it makes consistency more and makes speed up
select_bad_process().
Signed-off-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 28b83c51
...@@ -1205,7 +1205,7 @@ The following heuristics are then applied: ...@@ -1205,7 +1205,7 @@ The following heuristics are then applied:
* if the task was reniced, its score doubles * if the task was reniced, its score doubles
* superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
or CAP_SYS_RAWIO) have their score divided by 4 or CAP_SYS_RAWIO) have their score divided by 4
* if oom condition happened in one cpuset and checked task does not belong * if oom condition happened in one cpuset and checked process does not belong
to it, its score is divided by 8 to it, its score is divided by 8
* the resulting score is multiplied by two to the power of oom_adj, i.e. * the resulting score is multiplied by two to the power of oom_adj, i.e.
points <<= oom_adj when it is positive and points <<= oom_adj when it is positive and
......
...@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer) ...@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
do_posix_clock_monotonic_gettime(&uptime); do_posix_clock_monotonic_gettime(&uptime);
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
points = badness(task, uptime.tv_sec); points = badness(task->group_leader, uptime.tv_sec);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
return sprintf(buffer, "%lu\n", points); return sprintf(buffer, "%lu\n", points);
} }
......
...@@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks; ...@@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks;
static DEFINE_SPINLOCK(zone_scan_lock); static DEFINE_SPINLOCK(zone_scan_lock);
/* #define DEBUG */ /* #define DEBUG */
/*
* Is all threads of the target process nodes overlap ours?
*/
static int has_intersects_mems_allowed(struct task_struct *tsk)
{
struct task_struct *t;
t = tsk;
do {
if (cpuset_mems_allowed_intersects(current, t))
return 1;
t = next_thread(t);
} while (t != tsk);
return 0;
}
/** /**
* badness - calculate a numeric value for how bad this task has been * badness - calculate a numeric value for how bad this task has been
* @p: task struct of which task we should calculate * @p: task struct of which task we should calculate
...@@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) ...@@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
struct mm_struct *mm; struct mm_struct *mm;
struct task_struct *child; struct task_struct *child;
int oom_adj = p->signal->oom_adj; int oom_adj = p->signal->oom_adj;
struct task_cputime task_time;
unsigned long utime;
unsigned long stime;
if (oom_adj == OOM_DISABLE) if (oom_adj == OOM_DISABLE)
return 0; return 0;
...@@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) ...@@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
* of seconds. There is no particular reason for this other than * of seconds. There is no particular reason for this other than
* that it turned out to work very well in practice. * that it turned out to work very well in practice.
*/ */
cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime)) thread_group_cputime(p, &task_time);
>> (SHIFT_HZ + 3); utime = cputime_to_jiffies(task_time.utime);
stime = cputime_to_jiffies(task_time.stime);
cpu_time = (utime + stime) >> (SHIFT_HZ + 3);
if (uptime >= p->start_time.tv_sec) if (uptime >= p->start_time.tv_sec)
run_time = (uptime - p->start_time.tv_sec) >> 10; run_time = (uptime - p->start_time.tv_sec) >> 10;
...@@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) ...@@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
* because p may have allocated or otherwise mapped memory on * because p may have allocated or otherwise mapped memory on
* this node before. However it will be less likely. * this node before. However it will be less likely.
*/ */
if (!cpuset_mems_allowed_intersects(current, p)) if (!has_intersects_mems_allowed(p))
points /= 8; points /= 8;
/* /*
...@@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, ...@@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
static struct task_struct *select_bad_process(unsigned long *ppoints, static struct task_struct *select_bad_process(unsigned long *ppoints,
struct mem_cgroup *mem) struct mem_cgroup *mem)
{ {
struct task_struct *g, *p; struct task_struct *p;
struct task_struct *chosen = NULL; struct task_struct *chosen = NULL;
struct timespec uptime; struct timespec uptime;
*ppoints = 0; *ppoints = 0;
do_posix_clock_monotonic_gettime(&uptime); do_posix_clock_monotonic_gettime(&uptime);
do_each_thread(g, p) { for_each_process(p) {
unsigned long points; unsigned long points;
/* /*
...@@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, ...@@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
chosen = p; chosen = p;
*ppoints = points; *ppoints = points;
} }
} while_each_thread(g, p); }
return chosen; return chosen;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment