Commit c1a34035 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6_route_sharing'

Martin KaFai Lau says:

====================
ipv6: Only create RTF_CACHE route after encountering pmtu exception

v4 -> v5:
- Patch 1 is new. Clean up the ipv6_select_ident() and ip6_fragment().

- Further simplify the newly added rt6_get_pcpu_route().  If there is a
  'prev' after cmpxchg, return prev instead of the newly created percpu
  clone.

v3 -> v4:
- Patch 8 is new. It keeps track of the DST_NOCACHE routes in a list to handle
  the iface down/unregister event.

- Remove rcu from the newly added rt6i_pcpu variable.  It is not needed
  because it has already been protected by the existing reader/writer lock.

- Thanks to 'Julian Anastasov <ja@ssi.bg>' for testing the FLOWI_FLAG_KNOWN_NH
  patches.

v2 -> v3:
- Patch 5 to 7 are new.  They take care of cases where the daddr in
  skb is not the one used to do the route look-up.  There is also
  related changes to rt6_nexthop() since v2 which is in patch 2/9.
  Thanks to 'Julian Anastasov <ja@ssi.bg>' for pointing it out.

- Fix a few problems in __ip6_rt_update_pmtu(), like setting the expire
  and mtu before inserting to the tree and don't do dst_destroy() after
  tree insertion failure.  Also update the rt6i_pmtu in fib6_add_rt2node().
  Thanks to 'Steffen Klassert <steffen.klassert@secunet.com>' for pointing
  it out.

- Merge ip6_pmtu_rt_cache_alloc() into ip6_rt_cache_alloc().

v1 -> v2:
- Move the /128 route bug fixes to another series (accepted).
- Create a function for checking (rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)).
- Avoid shuffling the skb network_header.  Instead, change the function
  signature to take iph instead of skb.

- Many Thanks to 'Hannes Frederic Sowa <hannes@stressinduktion.org>' on
  reviewing v1 and v2 and giving advice.

--Martin

~~~ start: v1 compose message (with the out-dated parts removed) ~~~

This series is to avoid creating a RTF_CACHE route whenever we are consulting
the fib6 tree with a new destination.  Instead, only create RTF_CACHE route
when we see a pmtu exception.

Out of all ipv6 RTF_CACHE routes that are created, the percentage that has a
different mtu is very small. In one of our end-user facing proxy server,
only 1k out of 80k RTF_CACHE routes have a smaller MTU.  For our DC
traffic, there is no mtu exception.

A large fib6 tree has problems like, 'ip -6 r show' takes a long time.
gc may kick in too often.  Also, when a service has restarted and a lot
of new TCP conn requests come in, it creates pressure on the tree by inserting
a lot of RTF_CACHE in a short time and it currently requires a write lock
to do that.

The first few patches are prep works to remove assumption that the
returned rt is always RTF_CACHE.

The patch 'ipv6: Only create RTF_CACHE routes after encountering pmtu exception'
do the lazy RTF_CACHE route creation.

The following patches added percpu rt to compensate the performance loss after
doing the RTF_CACHE lazy creation.

Here is some numbers of the udpflood test.  The udpflood has been
slightly modified to have a time limit instead of count limit.

A /64 via gateway route is used for the test. Each udpflood uses 10000 dst
addresses.  The dst addresses of different udpflood processes do not overlap
with each other.

1                    16M                          15M
10                   61M                          61M
20                   65M                          62M
40                   88M                          83M

~~~ end: v1 compose message ~~~
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 01b69614 d52d3997
...@@ -728,7 +728,7 @@ static struct cxgbi_sock *cxgbi_check_route6(struct sockaddr *dst_addr) ...@@ -728,7 +728,7 @@ static struct cxgbi_sock *cxgbi_check_route6(struct sockaddr *dst_addr)
} }
ndev = n->dev; ndev = n->dev;
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { if (ipv6_addr_is_multicast(&daddr6->sin6_addr)) {
pr_info("multi-cast route %pI6 port %u, dev %s.\n", pr_info("multi-cast route %pI6 port %u, dev %s.\n",
daddr6->sin6_addr.s6_addr, daddr6->sin6_addr.s6_addr,
ntohs(daddr6->sin6_port), ndev->name); ntohs(daddr6->sin6_port), ndev->name);
......
...@@ -120,7 +120,11 @@ struct rt6_info { ...@@ -120,7 +120,11 @@ struct rt6_info {
struct rt6key rt6i_src; struct rt6key rt6i_src;
struct rt6key rt6i_prefsrc; struct rt6key rt6i_prefsrc;
struct list_head rt6i_uncached;
struct uncached_list *rt6i_uncached_list;
struct inet6_dev *rt6i_idev; struct inet6_dev *rt6i_idev;
struct rt6_info * __percpu *rt6i_pcpu;
u32 rt6i_metric; u32 rt6i_metric;
u32 rt6i_pmtu; u32 rt6i_pmtu;
...@@ -159,6 +163,14 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) ...@@ -159,6 +163,14 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
rt0->rt6i_flags |= RTF_EXPIRES; rt0->rt6i_flags |= RTF_EXPIRES;
} }
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
rt = (struct rt6_info *)(rt->dst.from);
return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
}
static inline void ip6_rt_put(struct rt6_info *rt) static inline void ip6_rt_put(struct rt6_info *rt)
{ {
/* dst_release() accepts a NULL parameter. /* dst_release() accepts a NULL parameter.
......
...@@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, ...@@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_IPV6_SUBTREES #ifdef CONFIG_IPV6_SUBTREES
np->saddr_cache = saddr; np->saddr_cache = saddr;
#endif #endif
np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; np->dst_cookie = rt6_get_cookie(rt);
} }
static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
...@@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) ...@@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
return rt->rt6i_flags & RTF_LOCAL; return rt->rt6i_flags & RTF_LOCAL;
} }
static inline bool ipv6_anycast_destination(const struct sk_buff *skb) static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
const struct in6_addr *daddr)
{ {
struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); struct rt6_info *rt = (struct rt6_info *)dst;
return rt->rt6i_flags & RTF_ANYCAST; return rt->rt6i_flags & RTF_ANYCAST ||
(rt->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
} }
int ip6_fragment(struct sock *sk, struct sk_buff *skb, int ip6_fragment(struct sock *sk, struct sk_buff *skb,
...@@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) ...@@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk)
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
} }
static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
struct in6_addr *daddr)
{ {
if (rt->rt6i_flags & RTF_GATEWAY)
return &rt->rt6i_gateway; return &rt->rt6i_gateway;
else if (unlikely(rt->rt6i_flags & RTF_CACHE))
return &rt->rt6i_dst.addr;
else
return daddr;
} }
#endif #endif
...@@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add ...@@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
} }
void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, u32 ipv6_select_ident(struct net *net,
struct rt6_info *rt); const struct in6_addr *daddr,
const struct in6_addr *saddr);
void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
int ip6_dst_hoplimit(struct dst_entry *dst); int ip6_dst_hoplimit(struct dst_entry *dst);
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF(pref) ((pref) << 27)
#define RTF_PREF_MASK 0x18000000 #define RTF_PREF_MASK 0x18000000
#define RTF_PCPU 0x40000000
#define RTF_LOCAL 0x80000000 #define RTF_LOCAL 0x80000000
......
...@@ -192,7 +192,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, ...@@ -192,7 +192,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
if (ipv6_addr_any(nexthop)) if (ipv6_addr_any(nexthop))
return NULL; return NULL;
} else { } else {
nexthop = rt6_nexthop(rt); nexthop = rt6_nexthop(rt, daddr);
/* We need to remember the address because it is needed /* We need to remember the address because it is needed
* by bt_xmit() when sending the packet. In bt_xmit(), the * by bt_xmit() when sending the packet. In bt_xmit(), the
......
...@@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, ...@@ -207,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct inet_peer *peer; struct inet_peer *peer;
peer = inet_getpeer_v6(net->ipv6.peers, peer = inet_getpeer_v6(net->ipv6.peers,
&rt->rt6i_dst.addr, 1); &fl6->daddr, 1);
res = inet_peer_xrlim_allow(peer, tmo); res = inet_peer_xrlim_allow(peer, tmo);
if (peer) if (peer)
inet_putpeer(peer); inet_putpeer(peer);
...@@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, ...@@ -337,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
* We won't send icmp if the destination is known * We won't send icmp if the destination is known
* anycast. * anycast.
*/ */
if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { if (ipv6_anycast_destination(dst, &fl6->daddr)) {
net_dbg_ratelimited("icmp6_send: acast source\n"); net_dbg_ratelimited("icmp6_send: acast source\n");
dst_release(dst); dst_release(dst);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -564,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (!ipv6_unicast_destination(skb) && if (!ipv6_unicast_destination(skb) &&
!(net->ipv6.sysctl.anycast_src_echo_reply && !(net->ipv6.sysctl.anycast_src_echo_reply &&
ipv6_anycast_destination(skb))) ipv6_anycast_destination(skb_dst(skb), saddr)))
saddr = NULL; saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
......
...@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn) ...@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn); kmem_cache_free(fib6_node_kmem, fn);
} }
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
if (!non_pcpu_rt->rt6i_pcpu)
return;
for_each_possible_cpu(cpu) {
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
if (pcpu_rt) {
dst_free(&pcpu_rt->dst);
*ppcpu_rt = NULL;
}
}
}
static void rt6_release(struct rt6_info *rt) static void rt6_release(struct rt6_info *rt)
{ {
if (atomic_dec_and_test(&rt->rt6i_ref)) if (atomic_dec_and_test(&rt->rt6i_ref)) {
rt6_free_pcpu(rt);
dst_free(&rt->dst); dst_free(&rt->dst);
}
} }
static void fib6_link_table(struct net *net, struct fib6_table *tb) static void fib6_link_table(struct net *net, struct fib6_table *tb)
...@@ -738,6 +760,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ...@@ -738,6 +760,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_clean_expires(iter); rt6_clean_expires(iter);
else else
rt6_set_expires(iter, rt->dst.expires); rt6_set_expires(iter, rt->dst.expires);
iter->rt6i_pmtu = rt->rt6i_pmtu;
return -EEXIST; return -EEXIST;
} }
/* If we have the same destination and the same metric, /* If we have the same destination and the same metric,
......
...@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) ...@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
} }
rcu_read_lock_bh(); rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst); nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh)) if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
...@@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb) ...@@ -459,7 +459,7 @@ int ip6_forward(struct sk_buff *skb)
else else
target = &hdr->daddr; target = &hdr->daddr;
peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
/* Limit redirects both by destination (here) /* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect) and by source (inside ndisc_send_redirect)
...@@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -551,7 +551,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
struct frag_hdr *fh; struct frag_hdr *fh;
unsigned int mtu, hlen, left, len; unsigned int mtu, hlen, left, len;
int hroom, troom; int hroom, troom;
__be32 frag_id = 0; __be32 frag_id;
int ptr, offset = 0, err = 0; int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0; u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev); struct net *net = dev_net(skb_dst(skb)->dev);
...@@ -584,6 +584,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -584,6 +584,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
} }
mtu -= hlen + sizeof(struct frag_hdr); mtu -= hlen + sizeof(struct frag_hdr);
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
if (skb_has_frag_list(skb)) { if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb); int first_len = skb_pagelen(skb);
struct sk_buff *frag2; struct sk_buff *frag2;
...@@ -632,11 +635,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -632,11 +635,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
skb_reset_network_header(skb); skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen); memcpy(skb_network_header(skb), tmp_hdr, hlen);
ipv6_select_ident(net, fh, rt);
fh->nexthdr = nexthdr; fh->nexthdr = nexthdr;
fh->reserved = 0; fh->reserved = 0;
fh->frag_off = htons(IP6_MF); fh->frag_off = htons(IP6_MF);
frag_id = fh->identification; fh->identification = frag_id;
first_len = skb_pagelen(skb); first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb); skb->data_len = first_len - skb_headlen(skb);
...@@ -778,10 +780,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -778,10 +780,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
*/ */
fh->nexthdr = nexthdr; fh->nexthdr = nexthdr;
fh->reserved = 0; fh->reserved = 0;
if (!frag_id) {
ipv6_select_ident(net, fh, rt);
frag_id = fh->identification;
} else
fh->identification = frag_id; fh->identification = frag_id;
/* /*
...@@ -936,7 +934,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, ...@@ -936,7 +934,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/ */
rt = (struct rt6_info *) *dst; rt = (struct rt6_info *) *dst;
rcu_read_lock_bh(); rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh(); rcu_read_unlock_bh();
...@@ -1060,11 +1059,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, ...@@ -1060,11 +1059,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int odd, struct sk_buff *skb), int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen, void *from, int length, int hh_len, int fragheaderlen,
int transhdrlen, int mtu, unsigned int flags, int transhdrlen, int mtu, unsigned int flags,
struct rt6_info *rt) const struct flowi6 *fl6)
{ {
struct sk_buff *skb; struct sk_buff *skb;
struct frag_hdr fhdr;
int err; int err;
/* There is support for UDP large send offload by network /* There is support for UDP large send offload by network
...@@ -1106,8 +1104,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, ...@@ -1106,8 +1104,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
sizeof(struct frag_hdr)) & ~7; sizeof(struct frag_hdr)) & ~7;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP; skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
ipv6_select_ident(sock_net(sk), &fhdr, rt); skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
skb_shinfo(skb)->ip6_frag_id = fhdr.identification; &fl6->daddr,
&fl6->saddr);
append: append:
return skb_append_datato_frags(sk, skb, getfrag, from, return skb_append_datato_frags(sk, skb, getfrag, from,
...@@ -1332,7 +1331,7 @@ static int __ip6_append_data(struct sock *sk, ...@@ -1332,7 +1331,7 @@ static int __ip6_append_data(struct sock *sk,
(sk->sk_type == SOCK_DGRAM)) { (sk->sk_type == SOCK_DGRAM)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length, err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, hh_len, fragheaderlen,
transhdrlen, mtu, flags, rt); transhdrlen, mtu, flags, fl6);
if (err) if (err)
goto error; goto error;
return 0; return 0;
......
...@@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); ...@@ -151,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{ {
struct rt6_info *rt = (struct rt6_info *) dst; struct rt6_info *rt = (struct rt6_info *) dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; t->dst_cookie = rt6_get_cookie(rt);
dst_release(t->dst_cache); dst_release(t->dst_cache);
t->dst_cache = dst; t->dst_cache = dst;
} }
......
...@@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) ...@@ -1506,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n"); "Redirect: destination is not a neighbour\n");
goto release; goto release;
} }
peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
ret = inet_peer_xrlim_allow(peer, 1*HZ); ret = inet_peer_xrlim_allow(peer, 1*HZ);
if (peer) if (peer)
inet_putpeer(peer); inet_putpeer(peer);
......
...@@ -10,7 +10,8 @@ ...@@ -10,7 +10,8 @@
#include <net/secure_seq.h> #include <net/secure_seq.h>
static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
struct in6_addr *dst, struct in6_addr *src) const struct in6_addr *dst,
const struct in6_addr *src)
{ {
u32 hash, id; u32 hash, id;
...@@ -60,17 +61,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) ...@@ -60,17 +61,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
} }
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, u32 ipv6_select_ident(struct net *net,
struct rt6_info *rt) const struct in6_addr *daddr,
const struct in6_addr *saddr)
{ {
static u32 ip6_idents_hashrnd __read_mostly; static u32 ip6_idents_hashrnd __read_mostly;
u32 id; u32 id;
net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
&rt->rt6i_src.addr); return htonl(id);
fhdr->identification = htonl(id);
} }
EXPORT_SYMBOL(ipv6_select_ident); EXPORT_SYMBOL(ipv6_select_ident);
......
...@@ -865,6 +865,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -865,6 +865,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->ucast_oif; fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
if (inet->hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
dst = ip6_dst_lookup_flow(sk, &fl6, final_p); dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) { if (IS_ERR(dst)) {
err = PTR_ERR(dst); err = PTR_ERR(dst);
......
This diff is collapsed.
...@@ -99,8 +99,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) ...@@ -99,8 +99,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
dst_hold(dst); dst_hold(dst);
sk->sk_rx_dst = dst; sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
if (rt->rt6i_node) inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
} }
} }
...@@ -262,7 +261,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -262,7 +261,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst; rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle && if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && !tp->rx_opt.ts_recent_stamp &&
ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst); tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0; icsk->icsk_ext_hdr_len = 0;
......
...@@ -76,8 +76,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, ...@@ -76,8 +76,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
{ {
if (dst->ops->family == AF_INET6) { if (dst->ops->family == AF_INET6) {
struct rt6_info *rt = (struct rt6_info *)dst; struct rt6_info *rt = (struct rt6_info *)dst;
if (rt->rt6i_node) path->path_cookie = rt6_get_cookie(rt);
path->path_cookie = rt->rt6i_node->fn_sernum;
} }
path->u.rt6.rt6i_nfheader_len = nfheader_len; path->u.rt6.rt6i_nfheader_len = nfheader_len;
...@@ -105,8 +104,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, ...@@ -105,8 +104,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
RTF_LOCAL); RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric; xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
xdst->u.rt6.rt6i_node = rt->rt6i_node; xdst->u.rt6.rt6i_node = rt->rt6i_node;
if (rt->rt6i_node) xdst->route_cookie = rt6_get_cookie(rt);
xdst->route_cookie = rt->rt6i_node->fn_sernum;
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
xdst->u.rt6.rt6i_src = rt->rt6i_src; xdst->u.rt6.rt6i_src = rt->rt6i_src;
......
...@@ -364,13 +364,16 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -364,13 +364,16 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
static struct dst_entry * static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm) struct in6_addr *ret_saddr, int do_xfrm, int rt_mode)
{ {
struct dst_entry *dst; struct dst_entry *dst;
struct flowi6 fl6 = { struct flowi6 fl6 = {
.daddr = *daddr, .daddr = *daddr,
}; };
if (rt_mode & IP_VS_RT_MODE_KNOWN_NH)
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
dst = ip6_route_output(net, NULL, &fl6); dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) if (dst->error)
goto out_err; goto out_err;
...@@ -427,7 +430,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -427,7 +430,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
} }
dst = __ip_vs_route_output_v6(net, &dest->addr.in6, dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
&dest_dst->dst_saddr.in6, &dest_dst->dst_saddr.in6,
do_xfrm); do_xfrm, rt_mode);
if (!dst) { if (!dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0); __ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock_bh(&dest->dst_lock); spin_unlock_bh(&dest->dst_lock);
...@@ -435,7 +438,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -435,7 +438,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
goto err_unreach; goto err_unreach;
} }
rt = (struct rt6_info *) dst; rt = (struct rt6_info *) dst;
cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock); spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
...@@ -446,7 +449,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, ...@@ -446,7 +449,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest_dst->dst_saddr.in6; *ret_saddr = dest_dst->dst_saddr.in6;
} else { } else {
noref = 0; noref = 0;
dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
rt_mode);
if (!dst) if (!dst)
goto err_unreach; goto err_unreach;
rt = (struct rt6_info *) dst; rt = (struct rt6_info *) dst;
...@@ -781,7 +785,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -781,7 +785,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */ /* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): " "ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address"); "stopping DNAT to loopback address");
...@@ -1164,7 +1168,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1164,7 +1168,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
NULL, ipvsh, 0, NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL); IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0) if (local < 0)
goto tx_error; goto tx_error;
if (local) { if (local) {
...@@ -1346,7 +1351,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1346,7 +1351,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */ /* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG(1, "%s(): " IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n", "stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6); __func__, &cp->daddr.in6);
......
...@@ -779,8 +779,8 @@ static int callforward_do_filter(struct net *net, ...@@ -779,8 +779,8 @@ static int callforward_do_filter(struct net *net,
flowi6_to_flowi(&fl1), false)) { flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(net, (struct dst_entry **)&rt2, if (!afinfo->route(net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) { flowi6_to_flowi(&fl2), false)) {
if (ipv6_addr_equal(rt6_nexthop(rt1), if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
rt6_nexthop(rt2)) && rt6_nexthop(rt2, &fl2.daddr)) &&
rt1->dst.dev == rt2->dst.dev) rt1->dst.dev == rt2->dst.dev)
ret = 1; ret = 1;
dst_release(&rt2->dst); dst_release(&rt2->dst);
......
...@@ -152,6 +152,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) ...@@ -152,6 +152,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
fl6.daddr = info->gw.in6; fl6.daddr = info->gw.in6;
fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
dst = ip6_route_output(net, NULL, &fl6); dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) { if (dst->error) {
dst_release(dst); dst_release(dst);
......
...@@ -73,7 +73,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, ...@@ -73,7 +73,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
ret |= XT_ADDRTYPE_LOCAL; ret |= XT_ADDRTYPE_LOCAL;
if (rt->rt6i_flags & RTF_ANYCAST) if (ipv6_anycast_destination((struct dst_entry *)rt, addr))
ret |= XT_ADDRTYPE_ANYCAST; ret |= XT_ADDRTYPE_ANYCAST;
dst_release(&rt->dst); dst_release(&rt->dst);
......
...@@ -331,8 +331,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, ...@@ -331,8 +331,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
rt = (struct rt6_info *)dst; rt = (struct rt6_info *)dst;
t->dst = dst; t->dst = dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; t->dst_cookie = rt6_get_cookie(rt);
pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
&fl6->saddr); &fl6->saddr);
} else { } else {
t->dst = NULL; t->dst = NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment