Commit c862855d authored by David S. Miller's avatar David S. Miller

[IPV4]: Make fib_semantics algorithms scale better.

A singly linked list was previously used to
do fib_info object lookup for various actions
in the routing code.  This does not scale very
well with many devices and even a moderate number
of routes.  This was noted by Benjamin Lahaise.

To fix all of this we use 3 hash tables, two of
which grow dynamically as the number fib_info
objects increases while the final one is fixes in
size.

The statically sized table hashes on device index.
This is used for fib_sync_down, fib_sync_up, and
ip_fib_check_default.

The first dynamically sized table is keyed on
protocol, prefsrc, and priority.  This is used
by fib_create_info() to look for existing fib_info
objects matching the new one being constructed.

The last dynamically sized table is keyed on
the preferred source of the route if it has one
specified.  This is used by fib_sync_down when
a local address disappears.

There are still some scalability problems for
Bens test case in fib_hash.c and I will try to
attack those next.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0bdc860c
......@@ -23,8 +23,7 @@
/* WARNING: The ordering of these elements must match ordering
* of RTA_* rtnetlink attribute numbers.
*/
struct kern_rta
{
struct kern_rta {
void *rta_dst;
void *rta_src;
int *rta_iif;
......@@ -40,9 +39,12 @@ struct kern_rta
struct rta_session *rta_sess;
};
struct fib_nh
{
struct fib_info;
struct fib_nh {
struct net_device *nh_dev;
struct hlist_node nh_hash;
struct fib_info *nh_parent;
unsigned nh_flags;
unsigned char nh_scope;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
......@@ -60,9 +62,9 @@ struct fib_nh
* This structure contains data shared by many of routes.
*/
struct fib_info
{
struct list_head fib_list;
struct fib_info {
struct hlist_node fib_hash;
struct hlist_node fib_lhash;
int fib_treeref;
atomic_t fib_clntref;
int fib_dead;
......@@ -88,8 +90,7 @@ struct fib_info
struct fib_rule;
#endif
struct fib_result
{
struct fib_result {
unsigned char prefixlen;
unsigned char nh_sel;
unsigned char type;
......@@ -118,8 +119,7 @@ struct fib_result
#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
struct fib_table
{
struct fib_table {
unsigned char tb_id;
unsigned tb_stamp;
int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
......
......@@ -45,14 +45,14 @@
#define FSprintk(a...)
static struct list_head fib_info_list = LIST_HEAD_INIT(fib_info_list);
static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
int fib_info_cnt;
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_hash_size;
static unsigned int fib_info_cnt;
#define for_fib_info() { struct fib_info *fi; \
list_for_each_entry(fi, &fib_info_list, fib_list)
#define endfor_fib_info() }
#define DEVINDEX_HASHBITS 8
static struct hlist_head fib_info_devhash[DEVINDEX_HASHBITS];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
......@@ -156,7 +156,12 @@ void fib_release_info(struct fib_info *fi)
{
write_lock(&fib_info_lock);
if (fi && --fi->fib_treeref == 0) {
list_del(&fi->fib_list);
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
change_nexthops(fi) {
hlist_del(&nh->nh_hash);
} endfor_nexthops(fi)
fi->fib_dead = 1;
fib_info_put(fi);
}
......@@ -184,42 +189,79 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
return 0;
}
static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
{
for_fib_info() {
unsigned int mask = (fib_hash_size - 1);
unsigned int val = fi->fib_nhs;
val ^= fi->fib_protocol;
val ^= fi->fib_prefsrc;
val ^= fi->fib_priority;
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
static struct fib_info *fib_find_info(const struct fib_info *nfi)
{
struct hlist_head *head;
struct hlist_node *node;
struct fib_info *fi;
unsigned int hash;
hash = fib_info_hashfn(nfi);
head = &fib_info_hash[hash];
hlist_for_each_entry(fi, node, head, fib_hash) {
if (fi->fib_nhs != nfi->fib_nhs)
continue;
if (nfi->fib_protocol == fi->fib_protocol &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(fi->fib_metrics)) == 0 &&
((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
} endfor_fib_info();
}
return NULL;
}
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
unsigned int mask = ((1U << DEVINDEX_HASHBITS) - 1);
return (val ^
(val >> DEVINDEX_HASHBITS) ^
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
}
/* Check, that the gateway is already configured.
Used only by redirect accept routine.
*/
int ip_fib_check_default(u32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct hlist_node *node;
struct fib_nh *nh;
unsigned int hash;
read_lock(&fib_info_lock);
for_fib_info() {
if (fi->fib_flags & RTNH_F_DEAD)
continue;
for_nexthops(fi) {
if (nh->nh_dev == dev && nh->nh_gw == gw &&
nh->nh_scope == RT_SCOPE_LINK &&
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
hlist_for_each_entry(nh, node, head, nh_hash) {
if (nh->nh_dev == dev &&
nh->nh_gw == gw &&
!(nh->nh_flags&RTNH_F_DEAD)) {
read_unlock(&fib_info_lock);
return 0;
}
} endfor_nexthops(fi);
} endfor_fib_info();
}
read_unlock(&fib_info_lock);
return -1;
}
......@@ -446,6 +488,82 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
return 0;
}
static inline unsigned int fib_laddr_hashfn(u32 val)
{
unsigned int mask = (fib_hash_size - 1);
return (val ^ (val >> 7) ^ (val >> 14)) & mask;
}
static struct hlist_head *fib_hash_alloc(int bytes)
{
if (bytes <= PAGE_SIZE)
return kmalloc(bytes, GFP_KERNEL);
else
return (struct hlist_head *)
__get_free_pages(GFP_KERNEL, get_order(bytes));
}
static void fib_hash_free(struct hlist_head *hash, int bytes)
{
if (!hash)
return;
if (bytes <= PAGE_SIZE)
kfree(hash);
else
free_pages((unsigned long) hash, get_order(bytes));
}
static void fib_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *new_laddrhash,
unsigned int new_size)
{
unsigned int old_size = fib_hash_size;
unsigned int i;
write_lock(&fib_info_lock);
fib_hash_size = new_size;
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
struct hlist_node *node;
struct fib_info *fi;
hlist_for_each_entry(fi, node, head, fib_hash) {
struct hlist_head *dest;
unsigned int new_hash;
hlist_del(&fi->fib_hash);
new_hash = fib_info_hashfn(fi);
dest = &new_info_hash[new_hash];
hlist_add_head(&fi->fib_hash, dest);
}
}
fib_info_hash = new_info_hash;
for (i = 0; i < old_size; i++) {
struct hlist_head *lhead = &fib_info_laddrhash[i];
struct hlist_node *node;
struct fib_info *fi;
hlist_for_each_entry(fi, node, lhead, fib_lhash) {
struct hlist_head *ldest;
unsigned int new_hash;
hlist_del(&fi->fib_lhash);
new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
hlist_add_head(&fi->fib_lhash, ldest);
}
}
fib_info_laddrhash = new_laddrhash;
write_unlock(&fib_info_lock);
}
struct fib_info *
fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
const struct nlmsghdr *nlh, int *errp)
......@@ -471,15 +589,41 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
}
#endif
fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
err = -ENOBUFS;
if (fib_info_cnt >= fib_hash_size) {
unsigned int new_size = fib_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
unsigned int bytes;
if (!new_size)
new_size = 1;
bytes = new_size * sizeof(struct hlist_head *);
new_info_hash = fib_hash_alloc(bytes);
new_laddrhash = fib_hash_alloc(bytes);
if (!new_info_hash || !new_laddrhash) {
fib_hash_free(new_info_hash, bytes);
fib_hash_free(new_laddrhash, bytes);
} else
fib_hash_move(new_info_hash, new_laddrhash, new_size);
if (!fib_hash_size)
goto failure;
}
fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
fib_info_cnt++;
memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
fi->fib_protocol = r->rtm_protocol;
fi->fib_nhs = nhs;
change_nexthops(fi) {
nh->nh_parent = fi;
} endfor_nexthops(fi)
fi->fib_flags = r->rtm_flags;
if (rta->rta_priority)
fi->fib_priority = *rta->rta_priority;
......@@ -576,7 +720,24 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
fi->fib_treeref++;
atomic_inc(&fi->fib_clntref);
write_lock(&fib_info_lock);
list_add(&fi->fib_list, &fib_info_list);
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
struct hlist_head *head;
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
change_nexthops(fi) {
struct hlist_head *head;
unsigned int hash;
if (!nh->nh_dev)
continue;
hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
head = &fib_info_devhash[hash];
hlist_add_head(&nh->nh_hash, head);
} endfor_nexthops(fi)
write_unlock(&fib_info_lock);
return fi;
......@@ -875,13 +1036,38 @@ int fib_sync_down(u32 local, struct net_device *dev, int force)
if (force)
scope = -1;
for_fib_info() {
if (local && fi->fib_prefsrc == local) {
BUG_ON(!fib_info_laddrhash);
if (local) {
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
struct hlist_node *node;
struct fib_info *fi;
hlist_for_each_entry(fi, node, head, fib_lhash) {
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
ret++;
} else if (dev && fi->fib_nhs) {
int dead = 0;
}
}
}
if (dev) {
struct fib_info *prev_fi = NULL;
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
struct hlist_node *node;
struct fib_nh *nh;
hlist_for_each_entry(nh, node, head, nh_hash) {
struct fib_info *fi = nh->nh_parent;
int dead;
BUG_ON(!fi->fib_nhs);
if (nh->nh_dev != dev || fi == prev_fi)
continue;
prev_fi = fi;
dead = 0;
change_nexthops(fi) {
if (nh->nh_flags&RTNH_F_DEAD)
dead++;
......@@ -908,7 +1094,8 @@ int fib_sync_down(u32 local, struct net_device *dev, int force)
ret++;
}
}
} endfor_fib_info();
}
return ret;
}
......@@ -921,14 +1108,31 @@ int fib_sync_down(u32 local, struct net_device *dev, int force)
int fib_sync_up(struct net_device *dev)
{
int ret = 0;
struct fib_info *prev_fi;
unsigned int hash;
struct hlist_head *head;
struct hlist_node *node;
struct fib_nh *nh;
int ret;
if (!(dev->flags&IFF_UP))
return 0;
for_fib_info() {
int alive = 0;
prev_fi = NULL;
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
ret = 0;
hlist_for_each_entry(nh, node, head, nh_hash) {
struct fib_info *fi = nh->nh_parent;
int alive;
BUG_ON(!fi->fib_nhs);
if (nh->nh_dev != dev || fi == prev_fi)
continue;
prev_fi = fi;
alive = 0;
change_nexthops(fi) {
if (!(nh->nh_flags&RTNH_F_DEAD)) {
alive++;
......@@ -949,7 +1153,8 @@ int fib_sync_up(struct net_device *dev)
fi->fib_flags &= ~RTNH_F_DEAD;
ret++;
}
} endfor_fib_info();
}
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment