Commit a3a48de5 authored by Ido Schimmel's avatar Ido Schimmel Committed by David S. Miller

vxlan: mdb: Add MDB control path support

Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.

The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.

Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.

In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:

* IP address of the destination VXLAN tunnel endpoint where the
  multicast receivers reside.

* UDP destination port number to use to connect to the remote VXLAN
  tunnel endpoint.

* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
  tunnel endpoint. Required when Ingress Replication (IR) is used and
  the remote VTEP is not a member of originating broadcast domain
  (VLAN/VNI) [1].

* Source VNI Network Identifier the MDB entry belongs to. Used only when
  the VXLAN device is in external mode.

* Interface index of the outgoing interface to reach the remote VXLAN
  tunnel endpoint. This is required when the underlay destination IP is
  multicast (P2MP), as the multicast routing tables are not consulted.

All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.

[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6ab271aa
......@@ -4,4 +4,4 @@
obj-$(CONFIG_VXLAN) += vxlan.o
vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o
vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o vxlan_mdb.o
......@@ -2878,8 +2878,14 @@ static int vxlan_init(struct net_device *dev)
if (err)
goto err_free_percpu;
err = vxlan_mdb_init(vxlan);
if (err)
goto err_gro_cells_destroy;
return 0;
err_gro_cells_destroy:
gro_cells_destroy(&vxlan->gro_cells);
err_free_percpu:
free_percpu(dev->tstats);
err_vnigroup_uninit:
......@@ -2904,6 +2910,8 @@ static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
vxlan_mdb_fini(vxlan);
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
vxlan_vnigroup_uninit(vxlan);
......
This diff is collapsed.
......@@ -110,6 +110,14 @@ static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
}
static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
{
if (ip->sa.sa_family == AF_INET6)
return ipv6_addr_is_multicast(&ip->sin6.sin6_addr);
else
return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
}
#else /* !CONFIG_IPV6 */
static inline
......@@ -138,8 +146,21 @@ static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
}
static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
{
return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
}
#endif
static inline size_t vxlan_addr_size(const union vxlan_addr *ip)
{
if (ip->sa.sa_family == AF_INET6)
return sizeof(struct in6_addr);
else
return sizeof(__be32);
}
static inline struct vxlan_vni_node *
vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni)
{
......@@ -206,4 +227,14 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
/* vxlan_mdb.c */
int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb);
int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack);
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
int vxlan_mdb_init(struct vxlan_dev *vxlan);
void vxlan_mdb_fini(struct vxlan_dev *vxlan);
#endif
......@@ -3,6 +3,7 @@
#define __NET_VXLAN_H 1
#include <linux/if_vlan.h>
#include <linux/rhashtable-types.h>
#include <net/udp_tunnel.h>
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
......@@ -302,6 +303,10 @@ struct vxlan_dev {
struct vxlan_vni_group __rcu *vnigrp;
struct hlist_head fdb_head[FDB_HASH_SIZE];
struct rhashtable mdb_tbl;
struct hlist_head mdb_list;
unsigned int mdb_seq;
};
#define VXLAN_F_LEARN 0x01
......
......@@ -633,6 +633,11 @@ enum {
MDBA_MDB_EATTR_GROUP_MODE,
MDBA_MDB_EATTR_SOURCE,
MDBA_MDB_EATTR_RTPROT,
MDBA_MDB_EATTR_DST,
MDBA_MDB_EATTR_DST_PORT,
MDBA_MDB_EATTR_VNI,
MDBA_MDB_EATTR_IFINDEX,
MDBA_MDB_EATTR_SRC_VNI,
__MDBA_MDB_EATTR_MAX
};
#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
......@@ -728,6 +733,11 @@ enum {
MDBE_ATTR_SRC_LIST,
MDBE_ATTR_GROUP_MODE,
MDBE_ATTR_RTPROT,
MDBE_ATTR_DST,
MDBE_ATTR_DST_PORT,
MDBE_ATTR_VNI,
MDBE_ATTR_IFINDEX,
MDBE_ATTR_SRC_VNI,
__MDBE_ATTR_MAX,
};
#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment