Commit 505970b9 authored by Paul Jackson's avatar Paul Jackson Committed by Linus Torvalds

[PATCH] cpuset oom lock fix

The problem, reported in:

  http://bugzilla.kernel.org/show_bug.cgi?id=5859

and by various other email messages and lkml posts is that the cpuset hook
in the oom (out of memory) code can try to take a cpuset semaphore while
holding the tasklist_lock (a spinlock).

One must not sleep while holding a spinlock.

The fix seems easy enough - move the cpuset semaphore region outside the
tasklist_lock region.

This required a few lines of mechanism to implement.  The oom code where
the locking needs to be changed does not have access to the cpuset locks,
which are internal to kernel/cpuset.c only.  So I provided a couple more
cpuset interface routines, available to the rest of the kernel, which
simple take and drop the lock needed here (cpusets callback_sem).
Signed-off-by: default avatarPaul Jackson <pj@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ed68cb36
...@@ -48,6 +48,9 @@ extern void __cpuset_memory_pressure_bump(void); ...@@ -48,6 +48,9 @@ extern void __cpuset_memory_pressure_bump(void);
extern struct file_operations proc_cpuset_operations; extern struct file_operations proc_cpuset_operations;
extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
extern void cpuset_lock(void);
extern void cpuset_unlock(void);
#else /* !CONFIG_CPUSETS */ #else /* !CONFIG_CPUSETS */
static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init_early(void) { return 0; }
...@@ -93,6 +96,9 @@ static inline char *cpuset_task_status_allowed(struct task_struct *task, ...@@ -93,6 +96,9 @@ static inline char *cpuset_task_status_allowed(struct task_struct *task,
return buffer; return buffer;
} }
static inline void cpuset_lock(void) {}
static inline void cpuset_unlock(void) {}
#endif /* !CONFIG_CPUSETS */ #endif /* !CONFIG_CPUSETS */
#endif /* _LINUX_CPUSET_H */ #endif /* _LINUX_CPUSET_H */
...@@ -2149,6 +2149,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) ...@@ -2149,6 +2149,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
return allowed; return allowed;
} }
/**
* cpuset_lock - lock out any changes to cpuset structures
*
* The out of memory (oom) code needs to lock down cpusets
* from being changed while it scans the tasklist looking for a
* task in an overlapping cpuset. Expose callback_sem via this
* cpuset_lock() routine, so the oom code can lock it, before
* locking the task list. The tasklist_lock is a spinlock, so
* must be taken inside callback_sem.
*/
void cpuset_lock(void)
{
down(&callback_sem);
}
/**
* cpuset_unlock - release lock on cpuset changes
*
* Undo the lock taken in a previous cpuset_lock() call.
*/
void cpuset_unlock(void)
{
up(&callback_sem);
}
/** /**
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
* @p: pointer to task_struct of some other task. * @p: pointer to task_struct of some other task.
...@@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) ...@@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
* determine if task @p's memory usage might impact the memory * determine if task @p's memory usage might impact the memory
* available to the current task. * available to the current task.
* *
* Acquires callback_sem - not suitable for calling from a fast path. * Call while holding callback_sem.
**/ **/
int cpuset_excl_nodes_overlap(const struct task_struct *p) int cpuset_excl_nodes_overlap(const struct task_struct *p)
...@@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) ...@@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */ int overlap = 0; /* do cpusets overlap? */
down(&callback_sem);
task_lock(current); task_lock(current);
if (current->flags & PF_EXITING) { if (current->flags & PF_EXITING) {
task_unlock(current); task_unlock(current);
...@@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) ...@@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done: done:
up(&callback_sem);
return overlap; return overlap;
} }
......
...@@ -274,6 +274,7 @@ void out_of_memory(gfp_t gfp_mask, int order) ...@@ -274,6 +274,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
show_mem(); show_mem();
} }
cpuset_lock();
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
retry: retry:
p = select_bad_process(); p = select_bad_process();
...@@ -284,6 +285,7 @@ void out_of_memory(gfp_t gfp_mask, int order) ...@@ -284,6 +285,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
/* Found nothing?!?! Either we hang forever, or we panic. */ /* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) { if (!p) {
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
cpuset_unlock();
panic("Out of memory and no killable processes...\n"); panic("Out of memory and no killable processes...\n");
} }
...@@ -293,6 +295,7 @@ void out_of_memory(gfp_t gfp_mask, int order) ...@@ -293,6 +295,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
out: out:
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
cpuset_unlock();
if (mm) if (mm)
mmput(mm); mmput(mm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment