Commit 2e62fa69 authored by David S. Miller's avatar David S. Miller

Merge branch 'vxlan_group_policy_extension'

Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
   # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
   # iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3f3558bb 1dd144cf
...@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) ...@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
} }
/* Find VXLAN socket based on network namespace, address family and UDP port */ /* Find VXLAN socket based on network namespace, address family and UDP port
static struct vxlan_sock *vxlan_find_sock(struct net *net, * and enabled unshareable flags.
sa_family_t family, __be16 port) */
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
__be16 port, u32 flags)
{ {
struct vxlan_sock *vs; struct vxlan_sock *vs;
u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port && if (inet_sk(vs->sock->sk)->inet_sport == port &&
inet_sk(vs->sock->sk)->sk.sk_family == family) inet_sk(vs->sock->sk)->sk.sk_family == family &&
(vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
return vs; return vs;
} }
return NULL; return NULL;
...@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) ...@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
/* Look up VNI in a per net namespace table */ /* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
sa_family_t family, __be16 port) sa_family_t family, __be16 port,
u32 flags)
{ {
struct vxlan_sock *vs; struct vxlan_sock *vs;
vs = vxlan_find_sock(net, family, port); vs = vxlan_find_sock(net, family, port, flags);
if (!vs) if (!vs)
return NULL; return NULL;
...@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, ...@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
continue; continue;
vh2 = (struct vxlanhdr *)(p->data + off_vx); vh2 = (struct vxlanhdr *)(p->data + off_vx);
if (vh->vx_vni != vh2->vx_vni) { if (vh->vx_flags != vh2->vx_flags ||
vh->vx_vni != vh2->vx_vni) {
NAPI_GRO_CB(p)->same_flow = 0; NAPI_GRO_CB(p)->same_flow = 0;
continue; continue;
} }
...@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct vxlan_sock *vs; struct vxlan_sock *vs;
struct vxlanhdr *vxh; struct vxlanhdr *vxh;
u32 flags, vni; u32 flags, vni;
struct vxlan_metadata md = {0};
/* Need Vxlan and inner Ethernet header to be present */ /* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN)) if (!pskb_may_pull(skb, VXLAN_HLEN))
...@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
vni &= VXLAN_VID_MASK; vni &= VXLAN_VID_MASK;
} }
/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
struct vxlanhdr_gbp *gbp;
gbp = (struct vxlanhdr_gbp *)vxh;
md.gbp = ntohs(gbp->policy_id);
if (gbp->dont_learn)
md.gbp |= VXLAN_GBP_DONT_LEARN;
if (gbp->policy_applied)
md.gbp |= VXLAN_GBP_POLICY_APPLIED;
flags &= ~VXLAN_GBP_USED_BITS;
}
if (flags || (vni & ~VXLAN_VID_MASK)) { if (flags || (vni & ~VXLAN_VID_MASK)) {
/* If there are any unprocessed flags remaining treat /* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from * this as a malformed packet. This behavior diverges from
...@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto bad_flags; goto bad_flags;
} }
vs->rcv(vs, skb, vxh->vx_vni); md.vni = vxh->vx_vni;
vs->rcv(vs, skb, &md);
return 0; return 0;
drop: drop:
...@@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 1; return 1;
} }
static void vxlan_rcv(struct vxlan_sock *vs, static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct sk_buff *skb, __be32 vx_vni) struct vxlan_metadata *md)
{ {
struct iphdr *oip = NULL; struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL; struct ipv6hdr *oip6 = NULL;
...@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, ...@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
int err = 0; int err = 0;
union vxlan_addr *remote_ip; union vxlan_addr *remote_ip;
vni = ntohl(vx_vni) >> 8; vni = ntohl(md->vni) >> 8;
/* Is this VNI defined? */ /* Is this VNI defined? */
vxlan = vxlan_vs_find_vni(vs, vni); vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan) if (!vxlan)
...@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, ...@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
goto drop; goto drop;
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb->mark = md->gbp;
if (oip6) if (oip6)
err = IP6_ECN_decapsulate(oip6, skb); err = IP6_ECN_decapsulate(oip6, skb);
...@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) ...@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false; return false;
} }
static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
struct vxlan_metadata *md)
{
struct vxlanhdr_gbp *gbp;
gbp = (struct vxlanhdr_gbp *)vxh;
vxh->vx_flags |= htonl(VXLAN_HF_GBP);
if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1;
if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
gbp->policy_applied = 1;
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs, static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb, struct dst_entry *dst, struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr, struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl, struct in6_addr *daddr, __u8 prio, __u8 ttl,
__be16 src_port, __be16 dst_port, __be32 vni, __be16 src_port, __be16 dst_port,
bool xnet) struct vxlan_metadata *md, bool xnet)
{ {
struct vxlanhdr *vxh; struct vxlanhdr *vxh;
int min_headroom; int min_headroom;
...@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni; vxh->vx_vni = md->vni;
if (type & SKB_GSO_TUNNEL_REMCSUM) { if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
...@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
} }
} }
if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB)); skb_set_inner_protocol(skb, htons(ETH_P_TEB));
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
...@@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ...@@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int vxlan_xmit_skb(struct vxlan_sock *vs, int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet) __be16 src_port, __be16 dst_port,
struct vxlan_metadata *md, bool xnet)
{ {
struct vxlanhdr *vxh; struct vxlanhdr *vxh;
int min_headroom; int min_headroom;
...@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, ...@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni; vxh->vx_vni = md->vni;
if (type & SKB_GSO_TUNNEL_REMCSUM) { if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
...@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, ...@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
} }
} }
if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB)); skb_set_inner_protocol(skb, htons(ETH_P_TEB));
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
...@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *old_iph; const struct iphdr *old_iph;
struct flowi4 fl4; struct flowi4 fl4;
union vxlan_addr *dst; union vxlan_addr *dst;
struct vxlan_metadata md;
__be16 src_port = 0, dst_port; __be16 src_port = 0, dst_port;
u32 vni; u32 vni;
__be16 df = 0; __be16 df = 0;
...@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ip_rt_put(rt); ip_rt_put(rt);
dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port); dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan) if (!dst_vxlan)
goto tx_error; goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan); vxlan_encap_bypass(skb, vxlan, dst_vxlan);
...@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
fl4.saddr, dst->sin.sin_addr.s_addr, fl4.saddr, dst->sin.sin_addr.s_addr,
tos, ttl, df, src_port, dst_port, tos, ttl, df, src_port, dst_port, &md,
htonl(vni << 8),
!net_eq(vxlan->net, dev_net(vxlan->dev))); !net_eq(vxlan->net, dev_net(vxlan->dev)));
if (err < 0) { if (err < 0) {
/* skb is already freed. */ /* skb is already freed. */
...@@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_release(ndst); dst_release(ndst);
dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port); dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan) if (!dst_vxlan)
goto tx_error; goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan); vxlan_encap_bypass(skb, vxlan, dst_vxlan);
...@@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} }
ttl = ttl ? : ip6_dst_hoplimit(ndst); ttl = ttl ? : ip6_dst_hoplimit(ndst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
dev, &fl6.saddr, &fl6.daddr, 0, ttl, dev, &fl6.saddr, &fl6.daddr, 0, ttl,
src_port, dst_port, htonl(vni << 8), src_port, dst_port, &md,
!net_eq(vxlan->net, dev_net(vxlan->dev))); !net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif #endif
} }
...@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev) ...@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port); vxlan->dst_port, vxlan->flags);
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
/* If we have a socket with same port already, reuse it */ /* If we have a socket with same port already, reuse it */
vxlan_vs_add_dev(vs, vxlan); vxlan_vs_add_dev(vs, vxlan);
...@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { ...@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
}; };
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
...@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, ...@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
return vs; return vs;
spin_lock(&vn->sock_lock); spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
if (vs && ((vs->rcv != rcv) || if (vs && ((vs->rcv != rcv) ||
!atomic_add_unless(&vs->refcnt, 1, 0))) !atomic_add_unless(&vs->refcnt, 1, 0)))
vs = ERR_PTR(-EBUSY); vs = ERR_PTR(-EBUSY);
...@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, ...@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
vxlan->flags |= VXLAN_F_REMCSUM_RX; vxlan->flags |= VXLAN_F_REMCSUM_RX;
if (data[IFLA_VXLAN_GBP])
vxlan->flags |= VXLAN_F_GBP;
if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port)) { vxlan->dst_port, vxlan->flags)) {
pr_info("duplicate VNI %u\n", vni); pr_info("duplicate VNI %u\n", vni);
return -EEXIST; return -EEXIST;
} }
...@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
goto nla_put_failure; goto nla_put_failure;
if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;
return 0; return 0;
nla_put_failure: nla_put_failure:
......
...@@ -97,7 +97,10 @@ struct ip_tunnel { ...@@ -97,7 +97,10 @@ struct ip_tunnel {
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#define TUNNEL_OAM __cpu_to_be16(0x0200) #define TUNNEL_OAM __cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800) #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
struct tnl_ptk_info { struct tnl_ptk_info {
__be16 flags; __be16 flags;
......
...@@ -11,15 +11,76 @@ ...@@ -11,15 +11,76 @@
#define VNI_HASH_BITS 10 #define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS) #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
/* VXLAN protocol header */ /*
* VXLAN Group Based Policy Extension:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | VXLAN Network Identifier (VNI) | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* D = Don't Learn bit. When set, this bit indicates that the egress
* VTEP MUST NOT learn the source address of the encapsulated frame.
*
* A = Indicates that the group policy has already been applied to
* this packet. Policies MUST NOT be applied by devices when the
* A bit is set.
*
* [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
*/
struct vxlanhdr_gbp {
__u8 vx_flags;
#ifdef __LITTLE_ENDIAN_BITFIELD
__u8 reserved_flags1:3,
policy_applied:1,
reserved_flags2:2,
dont_learn:1,
reserved_flags3:1;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 reserved_flags1:1,
dont_learn:1,
reserved_flags2:2,
policy_applied:1,
reserved_flags3:3;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__be16 policy_id;
__be32 vx_vni;
};
#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
/* skb->mark mapping
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
#define VXLAN_GBP_ID_MASK (0xFFFF)
/* VXLAN protocol header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |G|R|R|R|I|R|R|C| Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | VXLAN Network Identifier (VNI) | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* G = 1 Group Policy (VXLAN-GBP)
* I = 1 VXLAN Network Identifier (VNI) present
* C = 1 Remote checksum offload (RCO)
*/
struct vxlanhdr { struct vxlanhdr {
__be32 vx_flags; __be32 vx_flags;
__be32 vx_vni; __be32 vx_vni;
}; };
/* VXLAN header flags. */ /* VXLAN header flags. */
#define VXLAN_HF_VNI 0x08000000 #define VXLAN_HF_RCO BIT(24)
#define VXLAN_HF_RCO 0x00200000 #define VXLAN_HF_VNI BIT(27)
#define VXLAN_HF_GBP BIT(31)
/* Remote checksum offload header option */ /* Remote checksum offload header option */
#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ #define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */
...@@ -32,8 +93,14 @@ struct vxlanhdr { ...@@ -32,8 +93,14 @@ struct vxlanhdr {
#define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_VID_MASK (VXLAN_N_VID - 1)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
struct vxlan_metadata {
__be32 vni;
u32 gbp;
};
struct vxlan_sock; struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
struct vxlan_metadata *md);
/* per UDP socket information */ /* per UDP socket information */
struct vxlan_sock { struct vxlan_sock {
...@@ -60,6 +127,10 @@ struct vxlan_sock { ...@@ -60,6 +127,10 @@ struct vxlan_sock {
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
#define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_TX 0x200
#define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_REMCSUM_RX 0x400
#define VXLAN_F_GBP 0x800
/* These flags must match in order for a socket to be shareable */
#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data, vxlan_rcv_t *rcv, void *data,
...@@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs); ...@@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct vxlan_sock *vs, int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet); __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
bool xnet);
static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
netdev_features_t features) netdev_features_t features)
......
...@@ -372,6 +372,7 @@ enum { ...@@ -372,6 +372,7 @@ enum {
IFLA_VXLAN_UDP_ZERO_CSUM6_RX, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_TX,
IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_REMCSUM_RX,
IFLA_VXLAN_GBP,
__IFLA_VXLAN_MAX __IFLA_VXLAN_MAX
}; };
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
......
...@@ -252,11 +252,21 @@ enum ovs_vport_attr { ...@@ -252,11 +252,21 @@ enum ovs_vport_attr {
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
enum {
OVS_VXLAN_EXT_UNSPEC,
OVS_VXLAN_EXT_GBP, /* Flag or __u32 */
__OVS_VXLAN_EXT_MAX,
};
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*/ */
enum { enum {
OVS_TUNNEL_ATTR_UNSPEC, OVS_TUNNEL_ATTR_UNSPEC,
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
OVS_TUNNEL_ATTR_EXTENSION,
__OVS_TUNNEL_ATTR_MAX __OVS_TUNNEL_ATTR_MAX
}; };
...@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr { ...@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
__OVS_TUNNEL_KEY_ATTR_MAX __OVS_TUNNEL_KEY_ATTR_MAX
}; };
......
...@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, ...@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
8)) - 1 8)) - 1
> sizeof(key->tun_opts)); > sizeof(key->tun_opts));
memcpy(GENEVE_OPTS(key, tun_info->options_len), memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
tun_info->options, tun_info->options_len); tun_info->options, tun_info->options_len);
key->tun_opts_len = tun_info->options_len; key->tun_opts_len = tun_info->options_len;
} else { } else {
......
...@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { ...@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info { struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel; struct ovs_key_ipv4_tunnel tunnel;
const struct geneve_opt *options; const void *options;
u8 options_len; u8 options_len;
}; };
...@@ -61,10 +61,10 @@ struct ovs_tunnel_info { ...@@ -61,10 +61,10 @@ struct ovs_tunnel_info {
* maximum size. This allows us to get the benefits of variable length * maximum size. This allows us to get the benefits of variable length
* matching for small options. * matching for small options.
*/ */
#define GENEVE_OPTS(flow_key, opt_len) \ #define TUN_METADATA_OFFSET(opt_len) \
((struct geneve_opt *)((flow_key)->tun_opts + \ (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ #define TUN_METADATA_OPTS(flow_key, opt_len) \
opt_len)) ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be32 saddr, __be32 daddr, __be32 saddr, __be32 daddr,
...@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, ...@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst, __be16 tp_dst,
__be64 tun_id, __be64 tun_id,
__be16 tun_flags, __be16 tun_flags,
const struct geneve_opt *opts, const void *opts,
u8 opts_len) u8 opts_len)
{ {
tun_info->tunnel.tun_id = tun_id; tun_info->tunnel.tun_id = tun_id;
...@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, ...@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst, __be16 tp_dst,
__be64 tun_id, __be64 tun_id,
__be16 tun_flags, __be16 tun_flags,
const struct geneve_opt *opts, const void *opts,
u8 opts_len) u8 opts_len)
{ {
__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
......
...@@ -49,6 +49,14 @@ ...@@ -49,6 +49,14 @@
#include <net/mpls.h> #include <net/mpls.h>
#include "flow_netlink.h" #include "flow_netlink.h"
#include "vport-vxlan.h"
struct ovs_len_tbl {
int len;
const struct ovs_len_tbl *next;
};
#define OVS_ATTR_NESTED -1
static void update_range(struct sw_flow_match *match, static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask) size_t offset, size_t size, bool is_mask)
...@@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) ...@@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+ nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
} }
...@@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) ...@@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */ + nla_total_size(28); /* OVS_KEY_ATTR_ND */
} }
static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
[OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
[OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
[OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
[OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
[OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
[OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
[OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = -1, [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
[OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_IN_PORT] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
[OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
[OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_TUNNEL] = -1, [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), .next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
}; };
static bool is_all_zero(const u8 *fp, size_t size) static bool is_all_zero(const u8 *fp, size_t size)
...@@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, ...@@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL; return -EINVAL;
} }
expected_len = ovs_key_lens[type]; expected_len = ovs_key_lens[type].len;
if (nla_len(nla) != expected_len && expected_len != -1) { if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Key %d has unexpected len %d expected %d", OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
type, nla_len(nla), expected_len); type, nla_len(nla), expected_len);
return -EINVAL; return -EINVAL;
...@@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, ...@@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
} }
opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
nla_len(a));
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
nla_len(a), is_mask); nla_len(a), is_mask);
return 0; return 0;
} }
static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
};
static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
struct sw_flow_match *match, bool is_mask,
bool log)
{
struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
unsigned long opt_key_offset;
struct ovs_vxlan_opts opts;
int err;
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
if (err < 0)
return err;
memset(&opts, 0, sizeof(opts));
if (tb[OVS_VXLAN_EXT_GBP])
opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
if (!is_mask)
SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
else
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
is_mask);
return 0;
}
static int ipv4_tun_from_nlattr(const struct nlattr *attr, static int ipv4_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask, struct sw_flow_match *match, bool is_mask,
bool log) bool log)
...@@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int rem; int rem;
bool ttl = false; bool ttl = false;
__be16 tun_flags = 0; __be16 tun_flags = 0;
int opts_type = 0;
nla_for_each_nested(a, attr, rem) { nla_for_each_nested(a, attr, rem) {
int type = nla_type(a); int type = nla_type(a);
int err; int err;
static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
[OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
[OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) { if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
OVS_NLERR(log, "Tunnel attr %d out of range max %d", OVS_NLERR(log, "Tunnel attr %d out of range max %d",
type, OVS_TUNNEL_KEY_ATTR_MAX); type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL; return -EINVAL;
} }
if (ovs_tunnel_key_lens[type] != nla_len(a) && if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
ovs_tunnel_key_lens[type] != -1) { ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
type, nla_len(a), ovs_tunnel_key_lens[type]); type, nla_len(a), ovs_tunnel_key_lens[type].len);
return -EINVAL; return -EINVAL;
} }
...@@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_OAM; tun_flags |= TUNNEL_OAM;
break; break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
if (opts_type) {
OVS_NLERR(log, "Multiple metadata blocks provided");
return -EINVAL;
}
err = genev_tun_opt_from_nlattr(a, match, is_mask, log); err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
if (err) if (err)
return err; return err;
tun_flags |= TUNNEL_OPTIONS_PRESENT; tun_flags |= TUNNEL_GENEVE_OPT;
opts_type = type;
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
if (opts_type) {
OVS_NLERR(log, "Multiple metadata blocks provided");
return -EINVAL;
}
err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
if (err)
return err;
tun_flags |= TUNNEL_VXLAN_OPT;
opts_type = type;
break; break;
default: default:
OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
...@@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, ...@@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
} }
} }
return opts_type;
}
static int vxlan_opt_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len)
{
const struct ovs_vxlan_opts *opts = tun_opts;
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
if (!nla)
return -EMSGSIZE;
if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
return -EMSGSIZE;
nla_nest_end(skb, nla);
return 0; return 0;
} }
static int __ipv4_tun_to_nlattr(struct sk_buff *skb, static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output, const struct ovs_key_ipv4_tunnel *output,
const struct geneve_opt *tun_opts, const void *tun_opts, int swkey_tun_opts_len)
int swkey_tun_opts_len)
{ {
if (output->tun_flags & TUNNEL_KEY && if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
...@@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, ...@@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) && if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE; return -EMSGSIZE;
if (tun_opts && if (tun_opts) {
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, if (output->tun_flags & TUNNEL_GENEVE_OPT &&
swkey_tun_opts_len, tun_opts)) nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
return -EMSGSIZE; swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
}
return 0; return 0;
} }
static int ipv4_tun_to_nlattr(struct sk_buff *skb, static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output, const struct ovs_key_ipv4_tunnel *output,
const struct geneve_opt *tun_opts, const void *tun_opts, int swkey_tun_opts_len)
int swkey_tun_opts_len)
{ {
struct nlattr *nla; struct nlattr *nla;
int err; int err;
...@@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, ...@@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
} }
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
is_mask, log)) is_mask, log) < 0)
return -EINVAL; return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
} }
...@@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ...@@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
return 0; return 0;
} }
static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void nlattr_set(struct nlattr *attr, u8 val,
const struct ovs_len_tbl *tbl)
{ {
struct nlattr *nla; struct nlattr *nla;
int rem; int rem;
/* The nlattr stream should already have been validated */ /* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) { nla_for_each_nested(nla, attr, rem) {
/* We assume that ovs_key_lens[type] == -1 means that type is a if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
* nested attribute nlattr_set(nla, val, tbl[nla_type(nla)].next);
*/
if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
nlattr_set(nla, val, false);
else else
memset(nla_data(nla), val, nla_len(nla)); memset(nla_data(nla), val, nla_len(nla));
} }
...@@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) ...@@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
static void mask_set_nlattr(struct nlattr *attr, u8 val) static void mask_set_nlattr(struct nlattr *attr, u8 val)
{ {
nlattr_set(attr, val, true); nlattr_set(attr, val, ovs_key_lens);
} }
/** /**
...@@ -1148,10 +1233,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1148,10 +1233,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure; goto nla_put_failure;
if ((swkey->tun_key.ipv4_dst || is_mask)) { if ((swkey->tun_key.ipv4_dst || is_mask)) {
const struct geneve_opt *opts = NULL; const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
opts = GENEVE_OPTS(output, swkey->tun_opts_len); opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
swkey->tun_opts_len)) swkey->tun_opts_len))
...@@ -1540,6 +1625,34 @@ void ovs_match_init(struct sw_flow_match *match, ...@@ -1540,6 +1625,34 @@ void ovs_match_init(struct sw_flow_match *match,
} }
} }
static int validate_geneve_opts(struct sw_flow_key *key)
{
struct geneve_opt *option;
int opts_len = key->tun_opts_len;
bool crit_opt = false;
option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
while (opts_len > 0) {
int len;
if (opts_len < sizeof(*option))
return -EINVAL;
len = sizeof(*option) + option->length * 4;
if (len > opts_len)
return -EINVAL;
crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
option = (struct geneve_opt *)((u8 *)option + len);
opts_len -= len;
};
key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
return 0;
}
static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log) struct sw_flow_actions **sfa, bool log)
{ {
...@@ -1547,36 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, ...@@ -1547,36 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_key key; struct sw_flow_key key;
struct ovs_tunnel_info *tun_info; struct ovs_tunnel_info *tun_info;
struct nlattr *a; struct nlattr *a;
int err, start; int err, start, opts_type;
ovs_match_init(&match, &key, NULL); ovs_match_init(&match, &key, NULL);
err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
if (err) if (opts_type < 0)
return err; return opts_type;
if (key.tun_opts_len) { if (key.tun_opts_len) {
struct geneve_opt *option = GENEVE_OPTS(&key, switch (opts_type) {
key.tun_opts_len); case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
int opts_len = key.tun_opts_len; err = validate_geneve_opts(&key);
bool crit_opt = false; if (err < 0)
return err;
while (opts_len > 0) { break;
int len; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
if (opts_len < sizeof(*option)) }
return -EINVAL;
len = sizeof(*option) + option->length * 4;
if (len > opts_len)
return -EINVAL;
crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
option = (struct geneve_opt *)((u8 *)option + len);
opts_len -= len;
};
key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
}; };
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
...@@ -1597,9 +1697,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, ...@@ -1597,9 +1697,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
* everything else will go away after flow setup. We can append * everything else will go away after flow setup. We can append
* it to tun_info and then point there. * it to tun_info and then point there.
*/ */
memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), memcpy((tun_info + 1),
key.tun_opts_len); TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
tun_info->options = (struct geneve_opt *)(tun_info + 1); tun_info->options = (tun_info + 1);
} else { } else {
tun_info->options = NULL; tun_info->options = NULL;
} }
...@@ -1622,8 +1722,8 @@ static int validate_set(const struct nlattr *a, ...@@ -1622,8 +1722,8 @@ static int validate_set(const struct nlattr *a,
return -EINVAL; return -EINVAL;
if (key_type > OVS_KEY_ATTR_MAX || if (key_type > OVS_KEY_ATTR_MAX ||
(ovs_key_lens[key_type] != nla_len(ovs_key) && (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
ovs_key_lens[key_type] != -1)) ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
return -EINVAL; return -EINVAL;
switch (key_type) { switch (key_type) {
......
...@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) ...@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
opts_len = geneveh->opt_len * 4; opts_len = geneveh->opt_len * 4;
flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
(geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) |
(geneveh->critical ? TUNNEL_CRIT_OPT : 0); (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
...@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
__be16 sport; __be16 sport;
struct rtable *rt; struct rtable *rt;
struct flowi4 fl; struct flowi4 fl;
u8 vni[3]; u8 vni[3], opts_len, *opts;
__be16 df; __be16 df;
int err; int err;
...@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
tunnel_id_to_vni(tun_key->tun_id, vni); tunnel_id_to_vni(tun_key->tun_id, vni);
skb->ignore_df = 1; skb->ignore_df = 1;
if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
opts = (u8 *)tun_info->options;
opts_len = tun_info->options_len;
} else {
opts = NULL;
opts_len = 0;
}
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_dst, tun_key->ipv4_tos,
tun_key->ipv4_ttl, df, sport, dport, tun_key->ipv4_ttl, df, sport, dport,
tun_key->tun_flags, vni, tun_key->tun_flags, vni, opts_len, opts,
tun_info->options_len, (u8 *)tun_info->options,
false); false);
if (err < 0) if (err < 0)
ip_rt_put(rt); ip_rt_put(rt);
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "datapath.h" #include "datapath.h"
#include "vport.h" #include "vport.h"
#include "vport-vxlan.h"
/** /**
* struct vxlan_port - Keeps track of open UDP ports * struct vxlan_port - Keeps track of open UDP ports
...@@ -49,6 +50,7 @@ ...@@ -49,6 +50,7 @@
struct vxlan_port { struct vxlan_port {
struct vxlan_sock *vs; struct vxlan_sock *vs;
char name[IFNAMSIZ]; char name[IFNAMSIZ];
u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
}; };
static struct vport_ops ovs_vxlan_vport_ops; static struct vport_ops ovs_vxlan_vport_ops;
...@@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) ...@@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
} }
/* Called with rcu_read_lock and BH disabled. */ /* Called with rcu_read_lock and BH disabled. */
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md)
{ {
struct ovs_tunnel_info tun_info; struct ovs_tunnel_info tun_info;
struct vxlan_port *vxlan_port;
struct vport *vport = vs->data; struct vport *vport = vs->data;
struct iphdr *iph; struct iphdr *iph;
struct ovs_vxlan_opts opts = {
.gbp = md->gbp,
};
__be64 key; __be64 key;
__be16 flags;
flags = TUNNEL_KEY;
vxlan_port = vxlan_vport(vport);
if (vxlan_port->exts & VXLAN_F_GBP)
flags |= TUNNEL_VXLAN_OPT;
/* Save outer tunnel values */ /* Save outer tunnel values */
iph = ip_hdr(skb); iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8); key = cpu_to_be64(ntohl(md->vni) >> 8);
ovs_flow_tun_info_init(&tun_info, iph, ovs_flow_tun_info_init(&tun_info, iph,
udp_hdr(skb)->source, udp_hdr(skb)->dest, udp_hdr(skb)->source, udp_hdr(skb)->dest,
key, TUNNEL_KEY, NULL, 0); key, flags, &opts, sizeof(opts));
ovs_vport_receive(vport, skb, &tun_info); ovs_vport_receive(vport, skb, &tun_info);
} }
...@@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) ...@@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE; return -EMSGSIZE;
if (vxlan_port->exts) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
if (vxlan_port->exts & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
nla_nest_end(skb, exts);
}
return 0; return 0;
} }
...@@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ...@@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
ovs_vport_deferred_free(vport); ovs_vport_deferred_free(vport);
} }
static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
{
struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
struct vxlan_port *vxlan_port;
int err;
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
if (err < 0)
return err;
vxlan_port = vxlan_vport(vport);
if (exts[OVS_VXLAN_EXT_GBP])
vxlan_port->exts |= VXLAN_F_GBP;
return 0;
}
static struct vport *vxlan_tnl_create(const struct vport_parms *parms) static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{ {
struct net *net = ovs_dp_get_net(parms->dp); struct net *net = ovs_dp_get_net(parms->dp);
...@@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) ...@@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport); vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ); strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
err = vxlan_configure_exts(vport, a);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
vxlan_port->exts);
if (IS_ERR(vs)) { if (IS_ERR(vs)) {
ovs_vport_free(vport); ovs_vport_free(vport);
return (void *)vs; return (void *)vs;
...@@ -140,12 +203,28 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) ...@@ -140,12 +203,28 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
return ERR_PTR(err); return ERR_PTR(err);
} }
static int vxlan_ext_gbp(struct sk_buff *skb)
{
const struct ovs_tunnel_info *tun_info;
const struct ovs_vxlan_opts *opts;
tun_info = OVS_CB(skb)->egress_tun_info;
opts = tun_info->options;
if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
tun_info->options_len >= sizeof(*opts))
return opts->gbp;
else
return 0;
}
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{ {
struct net *net = ovs_dp_get_net(vport->dp); struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport); struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
const struct ovs_key_ipv4_tunnel *tun_key; const struct ovs_key_ipv4_tunnel *tun_key;
struct vxlan_metadata md = {0};
struct rtable *rt; struct rtable *rt;
struct flowi4 fl; struct flowi4 fl;
__be16 src_port; __be16 src_port;
...@@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) ...@@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->ignore_df = 1; skb->ignore_df = 1;
src_port = udp_flow_src_port(net, skb, 0, 0, true); src_port = udp_flow_src_port(net, skb, 0, 0, true);
md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
md.gbp = vxlan_ext_gbp(skb);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
fl.saddr, tun_key->ipv4_dst, fl.saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df, tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port, src_port, dst_port,
htonl(be64_to_cpu(tun_key->tun_id) << 8), &md,
false); false);
if (err < 0) if (err < 0)
ip_rt_put(rt); ip_rt_put(rt);
......
#ifndef VPORT_VXLAN_H
#define VPORT_VXLAN_H 1
#include <linux/kernel.h>
#include <linux/types.h>
struct ovs_vxlan_opts {
__u32 gbp;
};
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment