Commit f01ec1c0 authored by Nicolas Dichtel's avatar Nicolas Dichtel Committed by David S. Miller

vxlan: add x-netns support

This patch allows to switch the netns when packet is encapsulated or
decapsulated.
The vxlan socket is openned into the i/o netns, ie into the netns where
encapsulated packets are received. The socket lookup is done into this netns to
find the corresponding vxlan tunnel. After decapsulation, the packet is
injecting into the corresponding interface which may stand to another netns.

When one of the two netns is removed, the tunnel is destroyed.

Configuration example:
ip netns add netns1
ip netns exec netns1 ip link set lo up
ip link add vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0
ip link set vxlan10 netns netns1
ip netns exec netns1 ip addr add 192.168.0.249/24 broadcast 192.168.0.255 dev vxlan10
ip netns exec netns1 ip link set vxlan10 up
Signed-off-by: default avatarNicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6afc0d7a
...@@ -127,6 +127,7 @@ struct vxlan_dev { ...@@ -127,6 +127,7 @@ struct vxlan_dev {
struct list_head next; /* vxlan's per namespace list */ struct list_head next; /* vxlan's per namespace list */
struct vxlan_sock *vn_sock; /* listening socket */ struct vxlan_sock *vn_sock; /* listening socket */
struct net_device *dev; struct net_device *dev;
struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */ struct vxlan_rdst default_dst; /* default destination */
union vxlan_addr saddr; /* source address */ union vxlan_addr saddr; /* source address */
__be16 dst_port; __be16 dst_port;
...@@ -1203,6 +1204,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, ...@@ -1203,6 +1204,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
remote_ip = &vxlan->default_dst.remote_ip; remote_ip = &vxlan->default_dst.remote_ip;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev); skb->protocol = eth_type_trans(skb, vxlan->dev);
/* Ignore packet loops (and multicast echo) */ /* Ignore packet loops (and multicast echo) */
...@@ -1618,7 +1620,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1618,7 +1620,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb, struct dst_entry *dst, struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr, struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl, struct in6_addr *daddr, __u8 prio, __u8 ttl,
__be16 src_port, __be16 dst_port, __be32 vni) __be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
{ {
struct ipv6hdr *ip6h; struct ipv6hdr *ip6h;
struct vxlanhdr *vxh; struct vxlanhdr *vxh;
...@@ -1631,7 +1634,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1631,7 +1634,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
skb->encapsulation = 1; skb->encapsulation = 1;
} }
skb_scrub_packet(skb, false); skb_scrub_packet(skb, xnet);
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+ VXLAN_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN + sizeof(struct ipv6hdr)
...@@ -1711,7 +1714,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1711,7 +1714,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int vxlan_xmit_skb(struct vxlan_sock *vs, int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni) __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{ {
struct vxlanhdr *vxh; struct vxlanhdr *vxh;
struct udphdr *uh; struct udphdr *uh;
...@@ -1760,7 +1763,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, ...@@ -1760,7 +1763,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
return err; return err;
return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
tos, ttl, df, false); tos, ttl, df, xnet);
} }
EXPORT_SYMBOL_GPL(vxlan_xmit_skb); EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
...@@ -1853,7 +1856,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1853,7 +1856,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
fl4.daddr = dst->sin.sin_addr.s_addr; fl4.daddr = dst->sin.sin_addr.s_addr;
fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
rt = ip_route_output_key(dev_net(dev), &fl4); rt = ip_route_output_key(vxlan->net, &fl4);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr); &dst->sin.sin_addr.s_addr);
...@@ -1874,7 +1877,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1874,7 +1877,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan; struct vxlan_dev *dst_vxlan;
ip_rt_put(rt); ip_rt_put(rt);
dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
if (!dst_vxlan) if (!dst_vxlan)
goto tx_error; goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan); vxlan_encap_bypass(skb, vxlan, dst_vxlan);
...@@ -1887,7 +1890,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1887,7 +1890,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
fl4.saddr, dst->sin.sin_addr.s_addr, fl4.saddr, dst->sin.sin_addr.s_addr,
tos, ttl, df, src_port, dst_port, tos, ttl, df, src_port, dst_port,
htonl(vni << 8)); htonl(vni << 8),
!net_eq(vxlan->net, dev_net(vxlan->dev)));
if (err < 0) if (err < 0)
goto rt_tx_error; goto rt_tx_error;
...@@ -1927,7 +1931,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1927,7 +1931,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan; struct vxlan_dev *dst_vxlan;
dst_release(ndst); dst_release(ndst);
dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
if (!dst_vxlan) if (!dst_vxlan)
goto tx_error; goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan); vxlan_encap_bypass(skb, vxlan, dst_vxlan);
...@@ -1938,7 +1942,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1938,7 +1942,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
dev, &fl6.saddr, &fl6.daddr, 0, ttl, dev, &fl6.saddr, &fl6.daddr, 0, ttl,
src_port, dst_port, htonl(vni << 8)); src_port, dst_port, htonl(vni << 8),
!net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif #endif
} }
...@@ -2082,7 +2087,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) ...@@ -2082,7 +2087,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
static int vxlan_init(struct net_device *dev) static int vxlan_init(struct net_device *dev)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs; struct vxlan_sock *vs;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
...@@ -2090,7 +2095,7 @@ static int vxlan_init(struct net_device *dev) ...@@ -2090,7 +2095,7 @@ static int vxlan_init(struct net_device *dev)
return -ENOMEM; return -ENOMEM;
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
if (vs) { if (vs) {
/* If we have a socket with same port already, reuse it */ /* If we have a socket with same port already, reuse it */
atomic_inc(&vs->refcnt); atomic_inc(&vs->refcnt);
...@@ -2172,8 +2177,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan) ...@@ -2172,8 +2177,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
/* Cleanup timer and forwarding table on shutdown */ /* Cleanup timer and forwarding table on shutdown */
static int vxlan_stop(struct net_device *dev) static int vxlan_stop(struct net_device *dev)
{ {
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock; struct vxlan_sock *vs = vxlan->vn_sock;
if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
...@@ -2202,7 +2207,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) ...@@ -2202,7 +2207,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
struct net_device *lowerdev; struct net_device *lowerdev;
int max_mtu; int max_mtu;
lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex); lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
if (lowerdev == NULL) if (lowerdev == NULL)
return eth_change_mtu(dev, new_mtu); return eth_change_mtu(dev, new_mtu);
...@@ -2285,7 +2290,6 @@ static void vxlan_setup(struct net_device *dev) ...@@ -2285,7 +2290,6 @@ static void vxlan_setup(struct net_device *dev)
dev->tx_queue_len = 0; dev->tx_queue_len = 0;
dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_LLTX;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_RXCSUM;
dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_GSO_SOFTWARE;
...@@ -2578,7 +2582,7 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add); ...@@ -2578,7 +2582,7 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add);
static void vxlan_sock_work(struct work_struct *work) static void vxlan_sock_work(struct work_struct *work)
{ {
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work); struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
struct net *net = dev_net(vxlan->dev); struct net *net = vxlan->net;
struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_net *vn = net_generic(net, vxlan_net_id);
__be16 port = vxlan->dst_port; __be16 port = vxlan->dst_port;
struct vxlan_sock *nvs; struct vxlan_sock *nvs;
...@@ -2605,6 +2609,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, ...@@ -2605,6 +2609,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
if (!data[IFLA_VXLAN_ID]) if (!data[IFLA_VXLAN_ID])
return -EINVAL; return -EINVAL;
vxlan->net = dev_net(dev);
vni = nla_get_u32(data[IFLA_VXLAN_ID]); vni = nla_get_u32(data[IFLA_VXLAN_ID]);
dst->remote_vni = vni; dst->remote_vni = vni;
...@@ -2739,8 +2745,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, ...@@ -2739,8 +2745,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
static void vxlan_dellink(struct net_device *dev, struct list_head *head) static void vxlan_dellink(struct net_device *dev, struct list_head *head)
{ {
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
if (!hlist_unhashed(&vxlan->hlist)) if (!hlist_unhashed(&vxlan->hlist))
...@@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net) ...@@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net)
return 0; return 0;
} }
static void __net_exit vxlan_exit_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan, *next;
struct net_device *dev, *aux;
LIST_HEAD(list);
rtnl_lock();
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &vxlan_link_ops)
unregister_netdevice_queue(dev, &list);
list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
/* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop.
*/
if (!net_eq(dev_net(vxlan->dev), net))
unregister_netdevice_queue(dev, &list);
}
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations vxlan_net_ops = { static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net, .init = vxlan_init_net,
.exit = vxlan_exit_net,
.id = &vxlan_net_id, .id = &vxlan_net_id,
.size = sizeof(struct vxlan_net), .size = sizeof(struct vxlan_net),
}; };
......
...@@ -33,7 +33,7 @@ void vxlan_sock_release(struct vxlan_sock *vs); ...@@ -33,7 +33,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct vxlan_sock *vs, int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni); __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
......
...@@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df, OVS_CB(skb)->tun_key->ipv4_ttl, df,
src_port, dst_port, src_port, dst_port,
htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
false);
if (err < 0) if (err < 0)
ip_rt_put(rt); ip_rt_put(rt);
error: error:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment