Commit 0e4e73f8 authored by Brent Casavant's avatar Brent Casavant Committed by Linus Torvalds

[PATCH] TCP hashes: NUMA interleaving

Modifies the TCP ehash and TCP bhash to enable the use of vmalloc to
alleviate boottime memory allocation imbalances on NUMA systems, utilizing
flags to the alloc_large_system_hash routine in order to centralize the
enabling of this behavior.
Signed-off-by: default avatarBrent Casavant <bcasavan@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent e330572f
...@@ -256,6 +256,7 @@ ...@@ -256,6 +256,7 @@
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/bootmem.h>
#include <net/icmp.h> #include <net/icmp.h>
#include <net/tcp.h> #include <net/tcp.h>
...@@ -2254,7 +2255,6 @@ __setup("thash_entries=", set_thash_entries); ...@@ -2254,7 +2255,6 @@ __setup("thash_entries=", set_thash_entries);
void __init tcp_init(void) void __init tcp_init(void)
{ {
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
unsigned long goal;
int order, i; int order, i;
if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
...@@ -2287,43 +2287,35 @@ void __init tcp_init(void) ...@@ -2287,43 +2287,35 @@ void __init tcp_init(void)
* *
* The methodology is similar to that of the buffer cache. * The methodology is similar to that of the buffer cache.
*/ */
if (num_physpages >= (128 * 1024))
goal = num_physpages >> (21 - PAGE_SHIFT);
else
goal = num_physpages >> (23 - PAGE_SHIFT);
if (thash_entries)
goal = (thash_entries * sizeof(struct tcp_ehash_bucket)) >> PAGE_SHIFT;
for (order = 0; (1UL << order) < goal; order++)
;
do {
tcp_ehash_size = (1UL << order) * PAGE_SIZE /
sizeof(struct tcp_ehash_bucket);
tcp_ehash_size >>= 1;
while (tcp_ehash_size & (tcp_ehash_size - 1))
tcp_ehash_size--;
tcp_ehash = (struct tcp_ehash_bucket *) tcp_ehash = (struct tcp_ehash_bucket *)
__get_free_pages(GFP_ATOMIC, order); alloc_large_system_hash("TCP established",
} while (!tcp_ehash && --order > 0); sizeof(struct tcp_ehash_bucket),
thash_entries,
if (!tcp_ehash) (num_physpages >= 128 * 1024) ?
panic("Failed to allocate TCP established hash table\n"); (25 - PAGE_SHIFT) :
(27 - PAGE_SHIFT),
HASH_HIGHMEM,
&tcp_ehash_size,
NULL,
0);
tcp_ehash_size = (1 << tcp_ehash_size) >> 1;
for (i = 0; i < (tcp_ehash_size << 1); i++) { for (i = 0; i < (tcp_ehash_size << 1); i++) {
rwlock_init(&tcp_ehash[i].lock); rwlock_init(&tcp_ehash[i].lock);
INIT_HLIST_HEAD(&tcp_ehash[i].chain); INIT_HLIST_HEAD(&tcp_ehash[i].chain);
} }
do {
tcp_bhash_size = (1UL << order) * PAGE_SIZE /
sizeof(struct tcp_bind_hashbucket);
if ((tcp_bhash_size > (64 * 1024)) && order > 0)
continue;
tcp_bhash = (struct tcp_bind_hashbucket *) tcp_bhash = (struct tcp_bind_hashbucket *)
__get_free_pages(GFP_ATOMIC, order); alloc_large_system_hash("TCP bind",
} while (!tcp_bhash && --order >= 0); sizeof(struct tcp_bind_hashbucket),
tcp_ehash_size,
if (!tcp_bhash) (num_physpages >= 128 * 1024) ?
panic("Failed to allocate TCP bind hash table\n"); (25 - PAGE_SHIFT) :
(27 - PAGE_SHIFT),
HASH_HIGHMEM,
&tcp_bhash_size,
NULL,
64 * 1024);
tcp_bhash_size = 1 << tcp_bhash_size;
for (i = 0; i < tcp_bhash_size; i++) { for (i = 0; i < tcp_bhash_size; i++) {
spin_lock_init(&tcp_bhash[i].lock); spin_lock_init(&tcp_bhash[i].lock);
INIT_HLIST_HEAD(&tcp_bhash[i].chain); INIT_HLIST_HEAD(&tcp_bhash[i].chain);
...@@ -2332,6 +2324,10 @@ void __init tcp_init(void) ...@@ -2332,6 +2324,10 @@ void __init tcp_init(void)
/* Try to be a bit smarter and adjust defaults depending /* Try to be a bit smarter and adjust defaults depending
* on available memory. * on available memory.
*/ */
for (order = 0; ((1 << order) << PAGE_SHIFT) <
(tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
order++)
;
if (order > 4) { if (order > 4) {
sysctl_local_port_range[0] = 32768; sysctl_local_port_range[0] = 32768;
sysctl_local_port_range[1] = 61000; sysctl_local_port_range[1] = 61000;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment