Commit f412ac08 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] mm: fix rss and mmlist locking

A couple of oddities were guarded by page_table_lock, no longer properly
guarded when that is split.

The mm_counters of file_rss and anon_rss: make those an atomic_t, or an
atomic64_t if the architecture supports it, in such a case.  Definitions by
courtesy of Christoph Lameter: who spent considerable effort on more scalable
ways of counting, but found insufficient benefit in practice.

And adding an mm with swap to the mmlist for swapoff: the list is well-
guarded by its own lock, but the list_empty check now has to be repeated
inside it.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4c21e2f2
...@@ -249,13 +249,47 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, ...@@ -249,13 +249,47 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
/*
* The mm counters are not protected by its page_table_lock,
* so must be incremented atomically.
*/
#ifdef ATOMIC64_INIT
#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
typedef atomic64_t mm_counter_t;
#else /* !ATOMIC64_INIT */
/*
* The counters wrap back to 0 at 2^32 * PAGE_SIZE,
* that is, at 16TB if using 4kB page size.
*/
#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
typedef atomic_t mm_counter_t;
#endif /* !ATOMIC64_INIT */
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
/*
* The mm counters are protected by its page_table_lock,
* so can be incremented directly.
*/
#define set_mm_counter(mm, member, value) (mm)->_##member = (value) #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
#define get_mm_counter(mm, member) ((mm)->_##member) #define get_mm_counter(mm, member) ((mm)->_##member)
#define add_mm_counter(mm, member, value) (mm)->_##member += (value) #define add_mm_counter(mm, member, value) (mm)->_##member += (value)
#define inc_mm_counter(mm, member) (mm)->_##member++ #define inc_mm_counter(mm, member) (mm)->_##member++
#define dec_mm_counter(mm, member) (mm)->_##member-- #define dec_mm_counter(mm, member) (mm)->_##member--
#define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss) typedef unsigned long mm_counter_t;
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
#define get_mm_rss(mm) \
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
#define update_hiwater_rss(mm) do { \ #define update_hiwater_rss(mm) do { \
unsigned long _rss = get_mm_rss(mm); \ unsigned long _rss = get_mm_rss(mm); \
if ((mm)->hiwater_rss < _rss) \ if ((mm)->hiwater_rss < _rss) \
...@@ -266,8 +300,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); ...@@ -266,8 +300,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
(mm)->hiwater_vm = (mm)->total_vm; \ (mm)->hiwater_vm = (mm)->total_vm; \
} while (0) } while (0)
typedef unsigned long mm_counter_t;
struct mm_struct { struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */ struct vm_area_struct * mmap; /* list of VMAs */
struct rb_root mm_rb; struct rb_root mm_rb;
...@@ -291,7 +323,9 @@ struct mm_struct { ...@@ -291,7 +323,9 @@ struct mm_struct {
* by mmlist_lock * by mmlist_lock
*/ */
/* Special counters protected by the page_table_lock */ /* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
mm_counter_t _file_rss; mm_counter_t _file_rss;
mm_counter_t _anon_rss; mm_counter_t _anon_rss;
......
...@@ -372,7 +372,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -372,7 +372,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
/* make sure dst_mm is on swapoff's mmlist. */ /* make sure dst_mm is on swapoff's mmlist. */
if (unlikely(list_empty(&dst_mm->mmlist))) { if (unlikely(list_empty(&dst_mm->mmlist))) {
spin_lock(&mmlist_lock); spin_lock(&mmlist_lock);
list_add(&dst_mm->mmlist, &src_mm->mmlist); if (list_empty(&dst_mm->mmlist))
list_add(&dst_mm->mmlist,
&src_mm->mmlist);
spin_unlock(&mmlist_lock); spin_unlock(&mmlist_lock);
} }
} }
......
...@@ -559,7 +559,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -559,7 +559,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
swap_duplicate(entry); swap_duplicate(entry);
if (list_empty(&mm->mmlist)) { if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock); spin_lock(&mmlist_lock);
list_add(&mm->mmlist, &init_mm.mmlist); if (list_empty(&mm->mmlist))
list_add(&mm->mmlist, &init_mm.mmlist);
spin_unlock(&mmlist_lock); spin_unlock(&mmlist_lock);
} }
set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment