Commit e64058be authored by Jay Vosburgh's avatar Jay Vosburgh Committed by Tim Gardner

UBUNTU: SAUCE: fan: tunnel multiple mapping mode (v3)

Switch to a single tunnel for all mappings, this removes the limitations
on how many mappings each tunnel can handle, and therefore how many Fan
slices each local address may hold.

NOTE: This introduces a new kernel netlink interface which needs updated
iproute2 support.

BugLink: http://bugs.launchpad.net/bugs/1470091Signed-off-by: default avatarJay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: default avatarAndy Whitcroft <apw@canonical.com>
Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>

Conflicts:
	include/net/ip_tunnels.h
parent cd22ccd2
...@@ -91,6 +91,19 @@ struct ip_tunnel_dst { ...@@ -91,6 +91,19 @@ struct ip_tunnel_dst {
}; };
struct metadata_dst; struct metadata_dst;
/* A fan overlay /8 (250.0.0.0/8, for example) maps to exactly one /16
* underlay (10.88.0.0/16, for example). Multiple local addresses within
* the /16 may be used, but a particular overlay may not span
* multiple underlay subnets.
*
* We store one underlay, indexed by the overlay's high order octet.
*/
#define FAN_OVERLAY_CNT 256
struct ip_tunnel_fan {
/* u32 __rcu *map;*/
u32 map[FAN_OVERLAY_CNT];
};
struct ip_tunnel { struct ip_tunnel {
struct ip_tunnel __rcu *next; struct ip_tunnel __rcu *next;
...@@ -123,6 +136,7 @@ struct ip_tunnel { ...@@ -123,6 +136,7 @@ struct ip_tunnel {
#endif #endif
struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
unsigned int prl_count; /* # of entries in PRL */ unsigned int prl_count; /* # of entries in PRL */
struct ip_tunnel_fan fan;
int ip_tnl_net_id; int ip_tnl_net_id;
struct gro_cells gro_cells; struct gro_cells gro_cells;
bool collect_md; bool collect_md;
...@@ -143,6 +157,7 @@ struct ip_tunnel { ...@@ -143,6 +157,7 @@ struct ip_tunnel {
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
#define TUNNEL_FAN __cpu_to_be16(0x4000)
struct tnl_ptk_info { struct tnl_ptk_info {
__be16 flags; __be16 flags;
......
...@@ -57,6 +57,10 @@ enum { ...@@ -57,6 +57,10 @@ enum {
IFLA_IPTUN_ENCAP_FLAGS, IFLA_IPTUN_ENCAP_FLAGS,
IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_SPORT,
IFLA_IPTUN_ENCAP_DPORT, IFLA_IPTUN_ENCAP_DPORT,
__IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */
IFLA_IPTUN_FAN_MAP = 33,
__IFLA_IPTUN_MAX, __IFLA_IPTUN_MAX,
}; };
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
...@@ -132,4 +136,20 @@ enum { ...@@ -132,4 +136,20 @@ enum {
}; };
#define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1)
enum {
IFLA_FAN_UNSPEC,
IFLA_FAN_MAPPING,
__IFLA_FAN_MAX,
};
#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
struct ip_tunnel_fan_map {
__be32 underlay;
__be32 overlay;
__u16 underlay_prefix;
__u16 overlay_prefix;
};
#endif /* _UAPI_IF_TUNNEL_H_ */ #endif /* _UAPI_IF_TUNNEL_H_ */
...@@ -1110,6 +1110,11 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -1110,6 +1110,11 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
} }
EXPORT_SYMBOL_GPL(ip_tunnel_newlink); EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
{
return tunnel->parms.i_flags & TUNNEL_FAN;
}
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p) struct ip_tunnel_parm *p)
{ {
...@@ -1119,7 +1124,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -1119,7 +1124,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
if (dev == itn->fb_tunnel_dev) if (dev == itn->fb_tunnel_dev)
return -EINVAL; return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL;
t = ip_tunnel_find(itn, p, dev->type); t = ip_tunnel_find(itn, p, dev->type);
......
...@@ -107,6 +107,7 @@ ...@@ -107,6 +107,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <linux/inetdevice.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/ip.h> #include <net/ip.h>
...@@ -208,6 +209,40 @@ static int ipip_rcv(struct sk_buff *skb) ...@@ -208,6 +209,40 @@ static int ipip_rcv(struct sk_buff *skb)
return 0; return 0;
} }
static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel)
{
return tunnel->parms.i_flags & TUNNEL_FAN;
}
/*
* Determine fan tunnel endpoint to send packet to, based on the inner IP
* address. For an overlay (inner) address Y.A.B.C, the transformation is
* F.G.A.B, where "F" and "G" are the first two octets of the underlay
* network (the network portion of a /16), "A" and "B" are the low order
* two octets of the underlay network host (the host portion of a /16),
* and "Y" is a configured first octet of the overlay network.
*
* E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
* subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to
* 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
* overlay network 99.6.7.0/24.
*/
static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
{
unsigned int overlay;
u32 daddr, underlay;
daddr = ntohl(ip_hdr(skb)->daddr);
overlay = daddr >> 24;
underlay = tunnel->fan.map[overlay];
if (!underlay)
return -EINVAL;
*iph = tunnel->parms.iph;
iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff));
return 0;
}
/* /*
* This function assumes it is being called from dev_queue_xmit() * This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function. * and that skb is filled properly by that function.
...@@ -215,7 +250,8 @@ static int ipip_rcv(struct sk_buff *skb) ...@@ -215,7 +250,8 @@ static int ipip_rcv(struct sk_buff *skb)
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph; const struct iphdr *tiph;
struct iphdr fiph;
if (unlikely(skb->protocol != htons(ETH_P_IP))) if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error; goto tx_error;
...@@ -224,6 +260,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -224,6 +260,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb)) if (IS_ERR(skb))
goto out; goto out;
if (ipip_tunnel_is_fan(tunnel)) {
if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
goto tx_error;
tiph = &fiph;
} else {
tiph = &tunnel->parms.iph;
}
skb_set_inner_ipproto(skb, IPPROTO_IPIP); skb_set_inner_ipproto(skb, IPPROTO_IPIP);
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
...@@ -375,21 +419,88 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], ...@@ -375,21 +419,88 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
return ret; return ret;
} }
static void ipip_fan_free_map(struct ip_tunnel *t)
{
memset(&t->fan.map, 0, sizeof(t->fan.map));
}
static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
{
u32 overlay, overlay_mask, underlay, underlay_mask;
if ((map->underlay_prefix && map->underlay_prefix != 16) ||
(map->overlay_prefix && map->overlay_prefix != 8))
return -EINVAL;
overlay = ntohl(map->overlay);
overlay_mask = ntohl(inet_make_mask(map->overlay_prefix));
underlay = ntohl(map->underlay);
underlay_mask = ntohl(inet_make_mask(map->underlay_prefix));
if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask))
return -EINVAL;
if (!(overlay & overlay_mask) && (underlay & underlay_mask))
return -EINVAL;
t->parms.i_flags |= TUNNEL_FAN;
/* Special case: overlay 0 and underlay 0 clears all mappings */
if (!overlay && !underlay) {
ipip_fan_free_map(t);
return 0;
}
overlay >>= (32 - map->overlay_prefix);
t->fan.map[overlay] = underlay;
return 0;
}
static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
struct ip_tunnel_parm *parms)
{
struct ip_tunnel_fan_map *map;
struct nlattr *attr;
int rem, rv;
if (!data[IFLA_IPTUN_FAN_MAP])
return 0;
if (parms->iph.daddr)
return -EINVAL;
nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
map = nla_data(attr);
rv = ipip_fan_set_map(t, map);
if (rv)
return rv;
}
return 0;
}
static int ipip_newlink(struct net *src_net, struct net_device *dev, static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[]) struct nlattr *tb[], struct nlattr *data[])
{ {
struct ip_tunnel_parm p; struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap; struct ip_tunnel_encap ipencap;
struct ip_tunnel *t = netdev_priv(dev);
int err;
if (ipip_netlink_encap_parms(data, &ipencap)) { if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev); err = ip_tunnel_encap_setup(t, &ipencap);
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0) if (err < 0)
return err; return err;
} }
ipip_netlink_parms(data, &p); ipip_netlink_parms(data, &p);
err = ipip_netlink_fan(data, t, &p);
if (err < 0)
return err;
return ip_tunnel_newlink(dev, tb, &p); return ip_tunnel_newlink(dev, tb, &p);
} }
...@@ -398,16 +509,20 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -398,16 +509,20 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
{ {
struct ip_tunnel_parm p; struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap; struct ip_tunnel_encap ipencap;
struct ip_tunnel *t = netdev_priv(dev);
int err;
if (ipip_netlink_encap_parms(data, &ipencap)) { if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev); err = ip_tunnel_encap_setup(t, &ipencap);
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0) if (err < 0)
return err; return err;
} }
ipip_netlink_parms(data, &p); ipip_netlink_parms(data, &p);
err = ipip_netlink_fan(data, t, &p);
if (err < 0)
return err;
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
...@@ -439,6 +554,8 @@ static size_t ipip_get_size(const struct net_device *dev) ...@@ -439,6 +554,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) + nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */ /* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) + nla_total_size(2) +
/* IFLA_IPTUN_FAN_MAP */
nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 +
0; 0;
} }
...@@ -466,6 +583,29 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -466,6 +583,29 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
tunnel->encap.flags)) tunnel->encap.flags))
goto nla_put_failure; goto nla_put_failure;
if (tunnel->parms.i_flags & TUNNEL_FAN) {
struct nlattr *fan_nest;
int i;
fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
if (!fan_nest)
goto nla_put_failure;
for (i = 0; i < 256; i++) {
if (tunnel->fan.map[i]) {
struct ip_tunnel_fan_map map;
map.underlay = htonl(tunnel->fan.map[i]);
map.underlay_prefix = 16;
map.overlay = htonl(i << 24);
map.overlay_prefix = 8;
if (nla_put(skb, IFLA_FAN_MAPPING,
sizeof(map), &map))
goto nla_put_failure;
}
}
nla_nest_end(skb, fan_nest);
}
return 0; return 0;
nla_put_failure: nla_put_failure:
...@@ -483,6 +623,9 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { ...@@ -483,6 +623,9 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
[IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
}; };
static struct rtnl_link_ops ipip_link_ops __read_mostly = { static struct rtnl_link_ops ipip_link_ops __read_mostly = {
...@@ -523,6 +666,23 @@ static struct pernet_operations ipip_net_ops = { ...@@ -523,6 +666,23 @@ static struct pernet_operations ipip_net_ops = {
.size = sizeof(struct ip_tunnel_net), .size = sizeof(struct ip_tunnel_net),
}; };
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *ipip_fan_header;
static unsigned int ipip_fan_version = 3;
static struct ctl_table ipip_fan_sysctls[] = {
{
.procname = "version",
.data = &ipip_fan_version,
.maxlen = sizeof(ipip_fan_version),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{},
};
#endif /* CONFIG_SYSCTL */
static int __init ipip_init(void) static int __init ipip_init(void)
{ {
int err; int err;
...@@ -541,9 +701,22 @@ static int __init ipip_init(void) ...@@ -541,9 +701,22 @@ static int __init ipip_init(void)
if (err < 0) if (err < 0)
goto rtnl_link_failed; goto rtnl_link_failed;
#ifdef CONFIG_SYSCTL
ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
ipip_fan_sysctls);
if (!ipip_fan_header) {
err = -ENOMEM;
goto sysctl_failed;
}
#endif /* CONFIG_SYSCTL */
out: out:
return err; return err;
#ifdef CONFIG_SYSCTL
sysctl_failed:
rtnl_link_unregister(&ipip_link_ops);
#endif /* CONFIG_SYSCTL */
rtnl_link_failed: rtnl_link_failed:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel_failed: xfrm_tunnel_failed:
...@@ -553,6 +726,9 @@ static int __init ipip_init(void) ...@@ -553,6 +726,9 @@ static int __init ipip_init(void)
static void __exit ipip_fini(void) static void __exit ipip_fini(void)
{ {
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipip_fan_header);
#endif /* CONFIG_SYSCTL */
rtnl_link_unregister(&ipip_link_ops); rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__); pr_info("%s: can't deregister tunnel\n", __func__);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment