Commit 8c0c07a3 authored by David S. Miller's avatar David S. Miller

Merge branch 'vrf-next'

David Ahern says:

====================
net: vrf: Fix ipv6 source address selection

IPv6 address selection is currently messed up for several use cases such
as unnumbered deployments with global addresses on the VRF device and none
on the enslaved devices.

Update the source address selection to consider the real output route as
opposed to the VRF route that sends packets to the VRF device first (ie.,
implement get_saddr6 similar to the IPv4 method) and update the IPv6
address selection to consider L3 domains and preference for addresses on
the VRF device).
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0023a061 afbac601
...@@ -999,6 +999,46 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, ...@@ -999,6 +999,46 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
return dst; return dst;
} }
/* called under rcu_read_lock */
static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
struct flowi6 *fl6)
{
struct net *net = dev_net(dev);
struct dst_entry *dst;
struct rt6_info *rt;
int err;
if (rt6_need_strict(&fl6->daddr)) {
rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
RT6_LOOKUP_F_IFACE);
if (unlikely(!rt))
return 0;
dst = &rt->dst;
} else {
__u8 flags = fl6->flowi6_flags;
fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
dst = ip6_route_output(net, sk, fl6);
rt = (struct rt6_info *)dst;
fl6->flowi6_flags = flags;
}
err = dst->error;
if (!err) {
err = ip6_route_get_saddr(net, rt, &fl6->daddr,
sk ? inet6_sk(sk)->srcprefs : 0,
&fl6->saddr);
}
dst_release(dst);
return err;
}
#endif #endif
static const struct l3mdev_ops vrf_l3mdev_ops = { static const struct l3mdev_ops vrf_l3mdev_ops = {
...@@ -1008,6 +1048,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { ...@@ -1008,6 +1048,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_rcv = vrf_l3_rcv,
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
.l3mdev_get_rt6_dst = vrf_get_rt6_dst, .l3mdev_get_rt6_dst = vrf_get_rt6_dst,
.l3mdev_get_saddr6 = vrf_get_saddr6,
#endif #endif
}; };
......
...@@ -18,6 +18,7 @@ struct route_info { ...@@ -18,6 +18,7 @@ struct route_info {
__u8 prefix[0]; /* 0,8 or 16 */ __u8 prefix[0]; /* 0,8 or 16 */
}; };
#include <net/addrconf.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/ip6_fib.h> #include <net/ip6_fib.h>
#include <net/sock.h> #include <net/sock.h>
...@@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg); ...@@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg);
int ip6_ins_rt(struct rt6_info *); int ip6_ins_rt(struct rt6_info *);
int ip6_del_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *);
int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
const struct in6_addr *daddr, unsigned int prefs, const struct in6_addr *daddr,
struct in6_addr *saddr); unsigned int prefs,
struct in6_addr *saddr)
{
struct inet6_dev *idev =
rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
int err = 0;
if (rt && rt->rt6i_prefsrc.plen)
*saddr = rt->rt6i_prefsrc.addr;
else
err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
daddr, prefs, saddr);
return err;
}
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags); const struct in6_addr *saddr, int oif, int flags);
......
...@@ -39,6 +39,9 @@ struct l3mdev_ops { ...@@ -39,6 +39,9 @@ struct l3mdev_ops {
/* IPv6 ops */ /* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
struct flowi6 *fl6); struct flowi6 *fl6);
int (*l3mdev_get_saddr6)(struct net_device *dev,
const struct sock *sk,
struct flowi6 *fl6);
}; };
#ifdef CONFIG_NET_L3_MASTER_DEV #ifdef CONFIG_NET_L3_MASTER_DEV
...@@ -76,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) ...@@ -76,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
return rc; return rc;
} }
static inline
const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
{
/* netdev_master_upper_dev_get_rcu calls
* list_first_or_null_rcu to walk the upper dev list.
* list_first_or_null_rcu does not handle a const arg. We aren't
* making changes, just want the master device from that list so
* typecast to remove the const
*/
struct net_device *dev = (struct net_device *)_dev;
const struct net_device *master;
if (!dev)
return NULL;
if (netif_is_l3_master(dev))
master = dev;
else if (netif_is_l3_slave(dev))
master = netdev_master_upper_dev_get_rcu(dev);
else
master = NULL;
return master;
}
/* get index of an interface to use for FIB lookups. For devices /* get index of an interface to use for FIB lookups. For devices
* enslaved to an L3 master device FIB lookups are based on the * enslaved to an L3 master device FIB lookups are based on the
* master index * master index
...@@ -140,6 +168,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) ...@@ -140,6 +168,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
struct flowi6 *fl6);
static inline static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
...@@ -185,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) ...@@ -185,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
return 0; return 0;
} }
static inline
const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
{
return NULL;
}
static inline int l3mdev_fib_oif_rcu(struct net_device *dev) static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{ {
return dev ? dev->ifindex : 0; return dev ? dev->ifindex : 0;
...@@ -230,6 +266,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) ...@@ -230,6 +266,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
return NULL; return NULL;
} }
static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
return 0;
}
static inline static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{ {
......
...@@ -1524,6 +1524,28 @@ static int __ipv6_dev_get_saddr(struct net *net, ...@@ -1524,6 +1524,28 @@ static int __ipv6_dev_get_saddr(struct net *net,
return hiscore_idx; return hiscore_idx;
} }
static int ipv6_get_saddr_master(struct net *net,
const struct net_device *dst_dev,
const struct net_device *master,
struct ipv6_saddr_dst *dst,
struct ipv6_saddr_score *scores,
int hiscore_idx)
{
struct inet6_dev *idev;
idev = __in6_dev_get(dst_dev);
if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
scores, hiscore_idx);
idev = __in6_dev_get(master);
if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
scores, hiscore_idx);
return hiscore_idx;
}
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs, const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr) struct in6_addr *saddr)
...@@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, ...@@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
if (idev) if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} else { } else {
const struct net_device *master;
int master_idx = 0;
/* if dst_dev exists and is enslaved to an L3 device, then
* prefer addresses from dst_dev and then the master over
* any other enslaved devices in the L3 domain.
*/
master = l3mdev_master_dev_rcu(dst_dev);
if (master) {
master_idx = master->ifindex;
hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
master, &dst,
scores, hiscore_idx);
if (scores[hiscore_idx].ifa)
goto out;
}
for_each_netdev_rcu(net, dev) { for_each_netdev_rcu(net, dev) {
/* only consider addresses on devices in the
* same L3 domain
*/
if (l3mdev_master_ifindex_rcu(dev) != master_idx)
continue;
idev = __in6_dev_get(dev); idev = __in6_dev_get(dev);
if (!idev) if (!idev)
continue; continue;
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} }
} }
out:
rcu_read_unlock(); rcu_read_unlock();
hiscore = &scores[hiscore_idx]; hiscore = &scores[hiscore_idx];
......
...@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, ...@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err; int err;
int flags = 0; int flags = 0;
if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
(!*dst || !(*dst)->error)) {
err = l3mdev_get_saddr6(net, sk, fl6);
if (err)
goto out_err;
}
/* The correct way to handle this would be to do /* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however, * ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the * the route-specific preferred source forces the
...@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, ...@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
return 0; return 0;
out_err_release: out_err_release:
if (err == -ENETUNREACH)
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst); dst_release(*dst);
*dst = NULL; *dst = NULL;
out_err:
if (err == -ENETUNREACH)
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err; return err;
} }
......
...@@ -2586,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ...@@ -2586,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
return rt; return rt;
} }
int ip6_route_get_saddr(struct net *net,
struct rt6_info *rt,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr)
{
struct inet6_dev *idev =
rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
int err = 0;
if (rt && rt->rt6i_prefsrc.plen)
*saddr = rt->rt6i_prefsrc.addr;
else
err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
daddr, prefs, saddr);
return err;
}
/* remove deleted ip from prefsrc entries */ /* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip { struct arg_dev_net_ip {
struct net_device *dev; struct net_device *dev;
......
...@@ -162,6 +162,30 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) ...@@ -162,6 +162,30 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
} }
EXPORT_SYMBOL_GPL(l3mdev_get_saddr); EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
struct net_device *dev;
int rc = 0;
if (fl6->flowi6_oif) {
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
if (dev && netif_is_l3_slave(dev))
dev = netdev_master_upper_dev_get_rcu(dev);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_get_saddr6)
rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
rcu_read_unlock();
}
return rc;
}
EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
/** /**
* l3mdev_fib_rule_match - Determine if flowi references an * l3mdev_fib_rule_match - Determine if flowi references an
* L3 master device * L3 master device
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment