Commit 627caa9d authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-flow-labels'

Daniel Borkmann says:

====================
BPF support for flow labels

This set adds support for tunnel key flow labels for vxlan
and geneve devices in collect meta data mode and eBPF support
for managing these. For details please see individual patches.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e327f4e1 4018ab18
......@@ -68,6 +68,7 @@ struct geneve_dev {
u8 tos; /* TOS override */
union geneve_addr remote; /* IP address for link partner */
struct list_head next; /* geneve's per namespace list */
__be32 label; /* IPv6 flowlabel override */
__be16 dst_port;
bool collect_md;
struct gro_cells gro_cells;
......@@ -846,6 +847,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->daddr = info->key.u.ipv6.dst;
fl6->saddr = info->key.u.ipv6.src;
fl6->flowi6_tos = RT_TOS(info->key.tos);
fl6->flowlabel = info->key.label;
dst_cache = &info->dst_cache;
} else {
prio = geneve->tos;
......@@ -857,6 +859,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
}
fl6->flowi6_tos = RT_TOS(prio);
fl6->flowlabel = geneve->label;
fl6->daddr = geneve->remote.sin6.sin6_addr;
dst_cache = &geneve->dst_cache;
}
......@@ -998,6 +1001,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct flowi6 fl6;
__u8 prio, ttl;
__be16 sport;
__be32 label;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
u32 flags = geneve->flags;
......@@ -1041,6 +1045,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
ttl = key->ttl;
label = info->key.label;
} else {
err = geneve6_build_skb(dst, skb, 0, geneve->vni,
0, NULL, flags, xnet);
......@@ -1052,9 +1057,11 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
ttl = 1;
ttl = ttl ? : ip6_dst_hoplimit(dst);
label = geneve->label;
}
udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
&fl6.saddr, &fl6.daddr, prio, ttl,
&fl6.saddr, &fl6.daddr, prio, ttl, label,
sport, geneve->dst_port,
!!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX));
return NETDEV_TX_OK;
......@@ -1238,6 +1245,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
[IFLA_GENEVE_TTL] = { .type = NLA_U8 },
[IFLA_GENEVE_TOS] = { .type = NLA_U8 },
[IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
[IFLA_GENEVE_PORT] = { .type = NLA_U16 },
[IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
......@@ -1295,8 +1303,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
static int geneve_configure(struct net *net, struct net_device *dev,
union geneve_addr *remote,
__u32 vni, __u8 ttl, __u8 tos, __be16 dst_port,
bool metadata, u32 flags)
__u32 vni, __u8 ttl, __u8 tos, __be32 label,
__be16 dst_port, bool metadata, u32 flags)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
......@@ -1306,7 +1314,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
if (!remote)
return -EINVAL;
if (metadata &&
(remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl))
(remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label))
return -EINVAL;
geneve->net = net;
......@@ -1321,10 +1329,14 @@ static int geneve_configure(struct net *net, struct net_device *dev,
(remote->sa.sa_family == AF_INET6 &&
ipv6_addr_is_multicast(&remote->sin6.sin6_addr)))
return -EINVAL;
if (label && remote->sa.sa_family != AF_INET6)
return -EINVAL;
geneve->remote = *remote;
geneve->ttl = ttl;
geneve->tos = tos;
geneve->label = label;
geneve->dst_port = dst_port;
geneve->collect_md = metadata;
geneve->flags = flags;
......@@ -1367,6 +1379,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
__u8 ttl = 0, tos = 0;
bool metadata = false;
union geneve_addr remote = geneve_remote_unspec;
__be32 label = 0;
__u32 vni = 0;
u32 flags = 0;
......@@ -1403,6 +1416,10 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
if (data[IFLA_GENEVE_TOS])
tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
if (data[IFLA_GENEVE_LABEL])
label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
IPV6_FLOWLABEL_MASK;
if (data[IFLA_GENEVE_PORT])
dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]);
......@@ -1421,8 +1438,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
flags |= GENEVE_F_UDP_ZERO_CSUM6_RX;
return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port,
metadata, flags);
return geneve_configure(net, dev, &remote, vni, ttl, tos, label,
dst_port, metadata, flags);
}
static void geneve_dellink(struct net_device *dev, struct list_head *head)
......@@ -1439,6 +1456,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
......@@ -1469,7 +1487,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
}
if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) ||
nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label))
goto nla_put_failure;
if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port))
......@@ -1521,7 +1540,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
return dev;
err = geneve_configure(net, dev, &geneve_remote_unspec,
0, 0, 0, htons(dst_port), true,
0, 0, 0, 0, htons(dst_port), true,
GENEVE_F_UDP_ZERO_CSUM6_RX);
if (err)
goto err;
......
......@@ -1790,6 +1790,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 label,
const struct in6_addr *daddr,
struct in6_addr *saddr,
struct dst_cache *dst_cache,
......@@ -1813,6 +1814,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
fl6.flowi6_tos = RT_TOS(tos);
fl6.daddr = *daddr;
fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
fl6.flowlabel = label;
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
......@@ -1888,7 +1890,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
__be32 vni;
__be32 vni, label;
__be16 df = 0;
__u8 tos, ttl;
int err;
......@@ -1939,12 +1941,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
label = vxlan->cfg.label;
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
if (info) {
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
if (info->options_len)
......@@ -2020,7 +2024,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
&dst->sin6.sin6_addr, &saddr,
label, &dst->sin6.sin6_addr, &saddr,
dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
......@@ -2067,7 +2071,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
&saddr, &dst->sin6.sin6_addr, tos, ttl,
src_port, dst_port, !udp_sum);
label, src_port, dst_port, !udp_sum);
#endif
}
......@@ -2390,7 +2394,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (!vxlan->vn6_sock)
return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
&info->key.u.ipv6.dst,
info->key.label, &info->key.u.ipv6.dst,
&info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
......@@ -2505,6 +2509,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
[IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
[IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
[IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
[IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
......@@ -2739,6 +2744,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
vxlan->flags |= VXLAN_F_IPV6;
}
if (conf->label && !use_ipv6) {
pr_info("label only supported in use with IPv6\n");
return -EINVAL;
}
if (conf->remote_ifindex) {
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
dst->remote_ifindex = conf->remote_ifindex;
......@@ -2887,6 +2897,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_TTL])
conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
if (data[IFLA_VXLAN_LABEL])
conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
IPV6_FLOWLABEL_MASK;
if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
conf.flags |= VXLAN_F_LEARN;
......@@ -2990,6 +3004,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
......@@ -3053,6 +3068,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
......
......@@ -126,7 +126,7 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
ip_tunnel_key_init(&tun_dst->u.tun_info.key,
iph->saddr, iph->daddr, iph->tos, iph->ttl,
0, 0, tunnel_id, flags);
0, 0, 0, tunnel_id, flags);
return tun_dst;
}
......@@ -152,8 +152,11 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
info->key.u.ipv6.src = ip6h->saddr;
info->key.u.ipv6.dst = ip6h->daddr;
info->key.tos = ipv6_get_dsfield(ip6h);
info->key.ttl = ip6h->hop_limit;
info->key.label = ip6_flowlabel(ip6h);
return tun_dst;
}
......
......@@ -48,6 +48,7 @@ struct ip_tunnel_key {
__be16 tun_flags;
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be32 label; /* Flow Label for IPv6 */
__be16 tp_src;
__be16 tp_dst;
};
......@@ -181,7 +182,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
__be32 saddr, __be32 daddr,
u8 tos, u8 ttl,
u8 tos, u8 ttl, __be32 label,
__be16 tp_src, __be16 tp_dst,
__be64 tun_id, __be16 tun_flags)
{
......@@ -192,6 +193,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
key->tos = tos;
key->ttl = ttl;
key->label = label;
key->tun_flags = tun_flags;
/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
......
......@@ -88,8 +88,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be16 src_port,
__be16 dst_port, bool nocheck);
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck);
#endif
void udp_tunnel_sock_release(struct socket *sock);
......
......@@ -162,6 +162,7 @@ struct vxlan_config {
u16 port_max;
u8 tos;
u8 ttl;
__be32 label;
u32 flags;
unsigned long age_interval;
unsigned int addrmax;
......
......@@ -375,6 +375,7 @@ struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
__u32 tunnel_label;
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -456,6 +456,7 @@ enum {
IFLA_VXLAN_GBP,
IFLA_VXLAN_REMCSUM_NOPARTIAL,
IFLA_VXLAN_COLLECT_METADATA,
IFLA_VXLAN_LABEL,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
......@@ -478,6 +479,7 @@ enum {
IFLA_GENEVE_UDP_CSUM,
IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
IFLA_GENEVE_LABEL,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
......
......@@ -1770,12 +1770,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return -EPROTO;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
case offsetof(struct bpf_tunnel_key, tunnel_label):
goto set_compat;
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
/* Fixup deprecated structure layouts here, so we have
* a common path later on.
*/
if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;
set_compat:
to = (struct bpf_tunnel_key *)compat;
break;
default:
......@@ -1787,11 +1790,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
if (flags & BPF_F_TUNINFO_IPV6)
if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6));
else
to->tunnel_label = be32_to_cpu(info->key.label);
} else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
}
if (unlikely(size != sizeof(struct bpf_tunnel_key)))
memcpy((void *)(long) r2, to, size);
......@@ -1850,6 +1855,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
/* Fixup deprecated structure layouts here, so we have
* a common path later on.
......@@ -1862,6 +1868,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return -EINVAL;
}
}
if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label))
return -EINVAL;
skb_dst_drop(skb);
dst_hold((struct dst_entry *) md);
......@@ -1882,6 +1890,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6));
info->key.label = cpu_to_be32(from->tunnel_label) &
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
if (flags & BPF_F_ZERO_CSUM_TX)
......
......@@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be16 src_port,
__be16 dst_port, bool nocheck)
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
......@@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
ip6_flow_hdr(ip6h, prio, htonl(0));
ip6_flow_hdr(ip6h, prio, label);
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = ttl;
......
......@@ -196,7 +196,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
&dst->ipv6, 0, ttl, src->udp_port,
&dst->ipv6, 0, ttl, 0, src->udp_port,
dst->udp_port, false);
#endif
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment