Commit 14aba409 authored by Jay Vosburgh's avatar Jay Vosburgh Committed by Tim Gardner

UBUNTU: SAUCE: fan: add VXLAN implementation

Generify the fan mapping support and utilise that to implement fan
mappings over vxlan transport.

Expose the existance of this functionality (when the module is loaded)
via an additional sysctl marker.
Signed-off-by: default avatarJay Vosburgh <jay.vosburgh@canonical.com>
[apw@canonical.com: added feature marker for fan over vxlan.]
Signed-off-by: default avatarAndy Whitcroft <apw@canonical.com>
parent cf71cf4c
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/igmp.h> #include <linux/igmp.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/inetdevice.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/hash.h> #include <linux/hash.h>
...@@ -106,6 +107,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) ...@@ -106,6 +107,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
ip_tunnel_collect_metadata(); ip_tunnel_collect_metadata();
} }
static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr)
{
struct ip_fan_map *fan_map;
rcu_read_lock();
list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
if (fan_map->overlay ==
(daddr & inet_make_mask(fan_map->overlay_prefix))) {
rcu_read_unlock();
return fan_map;
}
}
rcu_read_unlock();
return NULL;
}
static void vxlan_fan_flush_map(struct vxlan_dev *vxlan)
{
struct ip_fan_map *fan_map;
list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
list_del_rcu(&fan_map->list);
kfree_rcu(fan_map, rcu);
}
}
static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay)
{
struct ip_fan_map *fan_map;
fan_map = vxlan_fan_find_map(vxlan, overlay);
if (!fan_map)
return -ENOENT;
list_del_rcu(&fan_map->list);
kfree_rcu(fan_map, rcu);
return 0;
}
static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map)
{
__be32 overlay_mask, underlay_mask;
struct ip_fan_map *fan_map;
overlay_mask = inet_make_mask(map->overlay_prefix);
underlay_mask = inet_make_mask(map->underlay_prefix);
netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n",
map->overlay, map->overlay_prefix,
map->underlay, map->underlay_prefix,
overlay_mask, underlay_mask);
if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
return -EINVAL;
if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
return -EINVAL;
/* Special case: overlay 0 and underlay 0: flush all mappings */
if (!map->overlay && !map->underlay) {
vxlan_fan_flush_map(vxlan);
return 0;
}
/* Special case: overlay set and underlay 0: clear map for overlay */
if (!map->underlay)
return vxlan_fan_del_map(vxlan, map->overlay);
if (vxlan_fan_find_map(vxlan, map->overlay))
return -EEXIST;
fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
fan_map->underlay = map->underlay;
fan_map->overlay = map->overlay;
fan_map->underlay_prefix = map->underlay_prefix;
fan_map->overlay_mask = ntohl(overlay_mask);
fan_map->overlay_prefix = map->overlay_prefix;
list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps);
return 0;
}
static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan)
{
struct ifla_fan_map *map;
struct nlattr *attr;
int rem, rv;
nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
map = nla_data(attr);
rv = vxlan_fan_add_map(vxlan, map);
if (rv)
return rv;
}
return 0;
}
static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb,
struct vxlan_rdst *fan_rdst)
{
struct ip_fan_map *f_map;
union vxlan_addr *va;
u32 daddr, underlay;
struct arphdr *arp;
void *arp_ptr;
struct ethhdr *eth;
struct iphdr *iph;
eth = eth_hdr(skb);
switch (eth->h_proto) {
case htons(ETH_P_IP):
iph = ip_hdr(skb);
if (!iph)
return -EINVAL;
daddr = iph->daddr;
break;
case htons(ETH_P_ARP):
arp = arp_hdr(skb);
if (!arp)
return -EINVAL;
arp_ptr = arp + 1;
netdev_dbg(vxlan->dev,
"vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n",
arp_ptr, arp_ptr + skb->dev->addr_len,
arp_ptr + skb->dev->addr_len + 4,
arp_ptr + (skb->dev->addr_len * 2) + 4);
arp_ptr += (skb->dev->addr_len * 2) + 4;
memcpy(&daddr, arp_ptr, 4);
break;
default:
netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto);
return -EINVAL;
}
f_map = vxlan_fan_find_map(vxlan, daddr);
if (!f_map)
return -EINVAL;
daddr = ntohl(daddr);
underlay = ntohl(f_map->underlay);
if (!underlay)
return -EINVAL;
memset(fan_rdst, 0, sizeof(*fan_rdst));
va = &fan_rdst->remote_ip;
va->sa.sa_family = AF_INET;
fan_rdst->remote_vni = vxlan->default_dst.remote_vni;
va->sin.sin_addr.s_addr = htonl(underlay |
((daddr & ~f_map->overlay_mask) >>
(32 - f_map->overlay_prefix -
(32 - f_map->underlay_prefix))));
netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n",
daddr, underlay, va->sin.sin_addr.s_addr);
return 0;
}
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static inline static inline
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
...@@ -2029,6 +2191,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -2029,6 +2191,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto rt_tx_error; goto rt_tx_error;
} }
if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) {
netdev_dbg(dev, "discard fan to localhost %pI4\n",
&dst->sin.sin_addr.s_addr);
ip_rt_put(rt);
goto tx_free;
}
/* Bypass encapsulation if the destination is local */ /* Bypass encapsulation if the destination is local */
if (rt->rt_flags & RTCF_LOCAL && if (rt->rt_flags & RTCF_LOCAL &&
!(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
...@@ -2169,6 +2338,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2169,6 +2338,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
if (fan_has_map(&vxlan->fan)) {
struct vxlan_rdst fan_rdst;
netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n",
eth->h_proto, eth->h_dest);
if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) {
dev->stats.tx_dropped++;
kfree_skb(skb);
return NETDEV_TX_OK;
}
vxlan_xmit_one(skb, dev, &fan_rdst, 0);
return NETDEV_TX_OK;
}
f = vxlan_find_mac(vxlan, eth->h_dest); f = vxlan_find_mac(vxlan, eth->h_dest);
did_rsc = false; did_rsc = false;
...@@ -2532,6 +2715,8 @@ static void vxlan_setup(struct net_device *dev) ...@@ -2532,6 +2715,8 @@ static void vxlan_setup(struct net_device *dev)
for (h = 0; h < FDB_HASH_SIZE; ++h) for (h = 0; h < FDB_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]); INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
INIT_LIST_HEAD(&vxlan->fan.fan_maps);
} }
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
...@@ -2881,6 +3066,7 @@ EXPORT_SYMBOL_GPL(vxlan_dev_create); ...@@ -2881,6 +3066,7 @@ EXPORT_SYMBOL_GPL(vxlan_dev_create);
static int vxlan_newlink(struct net *src_net, struct net_device *dev, static int vxlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[]) struct nlattr *tb[], struct nlattr *data[])
{ {
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_config conf; struct vxlan_config conf;
int err; int err;
...@@ -2899,6 +3085,12 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, ...@@ -2899,6 +3085,12 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
conf.remote_ip.sa.sa_family = AF_INET6; conf.remote_ip.sa.sa_family = AF_INET6;
} }
if (data[IFLA_VXLAN_FAN_MAP]) {
err = vxlan_parse_fan_map(data, vxlan);
if (err)
return err;
}
if (data[IFLA_VXLAN_LOCAL]) { if (data[IFLA_VXLAN_LOCAL]) {
conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]); conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
conf.saddr.sa.sa_family = AF_INET; conf.saddr.sa.sa_family = AF_INET;
...@@ -3037,6 +3229,7 @@ static size_t vxlan_get_size(const struct net_device *dev) ...@@ -3037,6 +3229,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
nla_total_size(sizeof(struct ip_fan_map) * 256) +
0; 0;
} }
...@@ -3083,6 +3276,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -3083,6 +3276,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
} }
} }
if (fan_has_map(&vxlan->fan)) {
struct nlattr *fan_nest;
struct ip_fan_map *fan_map;
fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP);
if (!fan_nest)
goto nla_put_failure;
list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
struct ifla_fan_map map;
map.underlay = fan_map->underlay;
map.underlay_prefix = fan_map->underlay_prefix;
map.overlay = fan_map->overlay;
map.overlay_prefix = fan_map->overlay_prefix;
if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
goto nla_put_failure;
}
nla_nest_end(skb, fan_nest);
}
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING, nla_put_u8(skb, IFLA_VXLAN_LEARNING,
...@@ -3201,6 +3414,22 @@ static __net_init int vxlan_init_net(struct net *net) ...@@ -3201,6 +3414,22 @@ static __net_init int vxlan_init_net(struct net *net)
return 0; return 0;
} }
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *vxlan_fan_header;
static unsigned int vxlan_fan_version = 4;
static struct ctl_table vxlan_fan_sysctls[] = {
{
.procname = "vxlan",
.data = &vxlan_fan_version,
.maxlen = sizeof(vxlan_fan_version),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{},
};
#endif /* CONFIG_SYSCTL */
static void __net_exit vxlan_exit_net(struct net *net) static void __net_exit vxlan_exit_net(struct net *net)
{ {
struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_net *vn = net_generic(net, vxlan_net_id);
...@@ -3256,7 +3485,20 @@ static int __init vxlan_init_module(void) ...@@ -3256,7 +3485,20 @@ static int __init vxlan_init_module(void)
if (rc) if (rc)
goto out3; goto out3;
#ifdef CONFIG_SYSCTL
vxlan_fan_header = register_net_sysctl(&init_net, "net/fan",
vxlan_fan_sysctls);
if (!vxlan_fan_header) {
rc = -ENOMEM;
goto sysctl_failed;
}
#endif /* CONFIG_SYSCTL */
return 0; return 0;
#ifdef CONFIG_SYSCTL
sysctl_failed:
rtnl_link_unregister(&vxlan_link_ops);
#endif /* CONFIG_SYSCTL */
out3: out3:
unregister_netdevice_notifier(&vxlan_notifier_block); unregister_netdevice_notifier(&vxlan_notifier_block);
out2: out2:
...@@ -3269,6 +3511,9 @@ late_initcall(vxlan_init_module); ...@@ -3269,6 +3511,9 @@ late_initcall(vxlan_init_module);
static void __exit vxlan_cleanup_module(void) static void __exit vxlan_cleanup_module(void)
{ {
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(vxlan_fan_header);
#endif /* CONFIG_SYSCTL */
rtnl_link_unregister(&vxlan_link_ops); rtnl_link_unregister(&vxlan_link_ops);
unregister_netdevice_notifier(&vxlan_notifier_block); unregister_netdevice_notifier(&vxlan_notifier_block);
destroy_workqueue(vxlan_wq); destroy_workqueue(vxlan_wq);
......
...@@ -100,9 +100,18 @@ struct metadata_dst; ...@@ -100,9 +100,18 @@ struct metadata_dst;
*/ */
#define FAN_OVERLAY_CNT 256 #define FAN_OVERLAY_CNT 256
struct ip_fan_map {
__be32 underlay;
__be32 overlay;
u16 underlay_prefix;
u16 overlay_prefix;
u32 overlay_mask;
struct list_head list;
struct rcu_head rcu;
};
struct ip_tunnel_fan { struct ip_tunnel_fan {
/* u32 __rcu *map;*/ struct list_head fan_maps;
u32 map[FAN_OVERLAY_CNT];
}; };
struct ip_tunnel { struct ip_tunnel {
...@@ -157,7 +166,11 @@ struct ip_tunnel { ...@@ -157,7 +166,11 @@ struct ip_tunnel {
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
#define TUNNEL_FAN __cpu_to_be16(0x4000)
static inline int fan_has_map(const struct ip_tunnel_fan *fan)
{
return !list_empty(&fan->fan_maps);
}
struct tnl_ptk_info { struct tnl_ptk_info {
__be16 flags; __be16 flags;
......
...@@ -161,6 +161,8 @@ struct vxlan_dev { ...@@ -161,6 +161,8 @@ struct vxlan_dev {
struct vxlan_rdst default_dst; /* default destination */ struct vxlan_rdst default_dst; /* default destination */
u32 flags; /* VXLAN_F_* in vxlan.h */ u32 flags; /* VXLAN_F_* in vxlan.h */
struct ip_tunnel_fan fan;
struct timer_list age_timer; struct timer_list age_timer;
spinlock_t hash_lock; spinlock_t hash_lock;
unsigned int addrcnt; unsigned int addrcnt;
......
...@@ -443,6 +443,7 @@ enum { ...@@ -443,6 +443,7 @@ enum {
IFLA_VXLAN_GBP, IFLA_VXLAN_GBP,
IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_REMCSUM_NOPARTIAL,
IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_COLLECT_METADATA,
IFLA_VXLAN_FAN_MAP = 33,
__IFLA_VXLAN_MAX __IFLA_VXLAN_MAX
}; };
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
......
...@@ -145,7 +145,7 @@ enum { ...@@ -145,7 +145,7 @@ enum {
#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1) #define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
struct ip_tunnel_fan_map { struct ifla_fan_map {
__be32 underlay; __be32 underlay;
__be32 overlay; __be32 overlay;
__u16 underlay_prefix; __u16 underlay_prefix;
......
...@@ -1110,11 +1110,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -1110,11 +1110,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
} }
EXPORT_SYMBOL_GPL(ip_tunnel_newlink); EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
{
return tunnel->parms.i_flags & TUNNEL_FAN;
}
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p) struct ip_tunnel_parm *p)
{ {
...@@ -1124,7 +1119,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], ...@@ -1124,7 +1119,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
if (dev == itn->fb_tunnel_dev) if (dev == itn->fb_tunnel_dev)
return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL; return fan_has_map(&tunnel->fan) ? 0 : -EINVAL;
t = ip_tunnel_find(itn, p, dev->type); t = ip_tunnel_find(itn, p, dev->type);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment