[PATCH] speed up vm_enough_memory()

This function is called a lot. Every brk(). The atomic_add() against a global counter hurts on large SMP machines. The patch simply reduces the rate at which that atomic operation is performed, by accumulating a per-cpu count which is spilled into the global counter when the local counter overflows. It trades off efficiency for a little inaccuracy. I tried various implementations involving kmalloc_percpu() and open-coded per-cpu arrays in a generic "per-cpu counter" thing. They all were surprisingly sucky - the additional cache misses involved in walking the more complex data structures really showed up.

[PATCH] speed up vm_enough_memory()
This function is called a lot. Every brk(). The atomic_add() against a global counter hurts on large SMP machines. The patch simply reduces the rate at which that atomic operation is performed, by accumulating a per-cpu count which is spilled into the global counter when the local counter overflows. It trades off efficiency for a little inaccuracy. I tried various implementations involving kmalloc_percpu() and open-coded per-cpu arrays in a generic "per-cpu counter" thing. They all were surprisingly sucky - the additional cache misses involved in walking the more complex data structures really showed up.
29580832 · Andrew Morton · David S. Miller · 061afd80 · 29580832 · 29580832
Commit 29580832 authored Mar 22, 2003 by Andrew Morton Committed by David S. Miller Mar 22, 2003
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 9 deletions

fs/proc/proc_misc.c fs/proc/proc_misc.c +0 -2

include/linux/mman.h include/linux/mman.h +18 -1

mm/mmap.c mm/mmap.c +1 -6

mm/swap.c mm/swap.c +27 -0

No files found.
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -134,8 +134,6 @@ static int uptime_read_proc(char *page, char **start, off_t off,
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }

-extern atomic_t vm_committed_space;
-
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {

--- a/include/linux/mman.h
+++ b/include/linux/mman.h
 #ifndef _LINUX_MMAN_H
 #define _LINUX_MMAN_H

+#include <linux/config.h>
+
+#include <asm/atomic.h>
 #include <asm/mman.h>

 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2

 extern int vm_enough_memory(long pages);
-extern void vm_unacct_memory(long pages);
+extern atomic_t vm_committed_space;
+
+#ifdef CONFIG_SMP
+extern void vm_acct_memory(long pages);
+#else
+static inline void vm_acct_memory(long pages)
+{
+	atomic_add(pages, &vm_committed_space);
+}
+#endif
+
+static inline void vm_unacct_memory(long pages)
+{
+	vm_acct_memory(-pages);
+}

 #endif /* _LINUX_MMAN_H */
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -53,11 +53,6 @@ int sysctl_overcommit_memory = 0;	/* default is heuristic overcommit */
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 atomic_t vm_committed_space = ATOMIC_INIT(0);

-inline void vm_unacct_memory(long pages)
-{	
-	atomic_sub(pages, &vm_committed_space);
-}
-
 /*
 * Check that a process has enough memory to allocate a new virtual
 * mapping. 1 means there is enough memory for the allocation to
@@ -73,7 +68,7 @@ int vm_enough_memory(long pages)
 {
 	unsigned long free, allowed;

-	atomic_add(pages, &vm_committed_space);
+	vm_acct_memory(pages);

        /*
 	 * Sometimes we want to use more memory than we have

--- a/mm/swap.c
+++ b/mm/swap.c
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
@@ -347,6 +348,32 @@ unsigned int pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 	return pagevec_count(pvec);
 }

+
+#ifdef CONFIG_SMP
+/*
+ * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+ * CPUs
+ */
+#define ACCT_THRESHOLD	max(16, NR_CPUS * 2)
+
+static DEFINE_PER_CPU(long, committed_space) = 0;
+
+void vm_acct_memory(long pages)
+{
+	long *local;
+
+	preempt_disable();
+	local = &__get_cpu_var(committed_space);
+	*local += pages;
+	if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+		atomic_add(*local, &vm_committed_space);
+		*local = 0;
+	}
+	preempt_enable();
+}
+#endif
+
+
 /*
 * Perform any setup for the swap system
 */