Commit 026ace06 authored by Julian Anastasov's avatar Julian Anastasov Committed by Pablo Neira Ayuso

ipvs: optimize dst usage for real server

Currently when forwarding requests to real servers
we use dst_lock and atomic operations when cloning the
dst_cache value. As the dst_cache value does not change
most of the time it is better to use RCU and to lock
dst_lock only when we need to replace the obsoleted dst.
For this to work we keep dst_cache in new structure protected
by RCU. For packets to remote real servers we will use noref
version of dst_cache, it will be valid while we are in RCU
read-side critical section because now dst_release for replaced
dsts will be invoked after the grace period. Packets to
local real servers that are passed to local stack with
NF_ACCEPT need a dst clone.
Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 4115ded1
...@@ -724,6 +724,13 @@ struct ip_vs_service { ...@@ -724,6 +724,13 @@ struct ip_vs_service {
struct ip_vs_pe *pe; struct ip_vs_pe *pe;
}; };
/* Information for cached dst */
struct ip_vs_dest_dst {
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct rcu_head rcu_head;
};
/* /*
* The real server destination forwarding entry * The real server destination forwarding entry
...@@ -752,9 +759,7 @@ struct ip_vs_dest { ...@@ -752,9 +759,7 @@ struct ip_vs_dest {
/* for destination cache */ /* for destination cache */
spinlock_t dst_lock; /* lock of dst_cache */ spinlock_t dst_lock; /* lock of dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */ struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
/* for virtual service */ /* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */ struct ip_vs_service *svc; /* service it belongs to */
...@@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset, struct ip_vs_protocol *pp, int offset,
unsigned int hooknum, struct ip_vs_iphdr *iph); unsigned int hooknum, struct ip_vs_iphdr *iph);
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
......
...@@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip; goto ignore_ipip;
/* Prefer the resulting PMTU */ /* Prefer the resulting PMTU */
if (dest) { if (dest) {
spin_lock(&dest->dst_lock); struct ip_vs_dest_dst *dest_dst;
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache); rcu_read_lock();
spin_unlock(&dest->dst_lock); dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
rcu_read_unlock();
} }
if (mtu > 68 + sizeof(struct iphdr)) if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr); mtu -= sizeof(struct iphdr);
......
...@@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, ...@@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
return dest; return dest;
} }
/* Release dst_cache for dest in user context */ void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
{
struct ip_vs_dest_dst *dest_dst = container_of(head,
struct ip_vs_dest_dst,
rcu_head);
dst_release(dest_dst->dst_cache);
kfree(dest_dst);
}
/* Release dest_dst and dst_cache for dest in user context */
static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
{ {
struct dst_entry *old_dst; struct ip_vs_dest_dst *old;
old_dst = dest->dst_cache; old = rcu_dereference_protected(dest->dest_dst, 1);
dest->dst_cache = NULL; if (old) {
dst_release(old_dst); RCU_INIT_POINTER(dest->dest_dst, NULL);
dest->dst_saddr.ip = 0; call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
}
} }
/* /*
...@@ -1513,7 +1524,7 @@ static inline void ...@@ -1513,7 +1524,7 @@ static inline void
ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
{ {
spin_lock_bh(&dest->dst_lock); spin_lock_bh(&dest->dst_lock);
if (dest->dst_cache && dest->dst_cache->dev == dev) { if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
dev->name, dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr), IP_VS_DBG_ADDR(dest->af, &dest->addr),
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment