Commit d5a915b9 authored by David S. Miller's avatar David S. Miller

Merge branch 'gre-add-collect_md-mode-for-ERSPAN-tunnel'

William Tu says:

====================
gre: add collect_md mode for ERSPAN tunnel

This patch series provide collect_md mode for ERSPAN tunnel.  The fist patch
refactors the existing gre_fb_xmit function by exacting the route cache
portion into a new function called prepare_fb_xmit.  The second patch
introduces the collect_md mode for ERSPAN tunnel, by calling the
prepare_fb_xmit function and adding ERSPAN specific logic.  The final patch
adds the test case using bpf_skb_{set,get}_tunnel_{key,opt}.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 03157937 ef88f89c
......@@ -154,8 +154,10 @@ struct ip_tunnel {
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
#define TUNNEL_OPTIONS_PRESENT \
(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
struct tnl_ptk_info {
__be16 flags;
......
......@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
__be32 id, u32 index, bool truncate);
static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
......@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
false, false) < 0)
goto drop;
tunnel->index = ntohl(index);
if (tunnel->collect_md) {
struct ip_tunnel_info *info;
struct erspan_metadata *md;
__be64 tun_id;
__be16 flags;
tpi->flags |= TUNNEL_KEY;
flags = tpi->flags;
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags,
tun_id, sizeof(*md));
if (!tun_dst)
return PACKET_REJECT;
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
if (!md)
return PACKET_REJECT;
md->index = index;
info = &tun_dst->u.tun_info;
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
info->options_len = sizeof(*md);
} else {
tunnel->index = ntohl(index);
}
skb_reset_mac_header(skb);
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
......@@ -432,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
struct net_device *dev,
struct flowi4 *fl,
int tunnel_hlen)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl;
int min_headroom;
int tunnel_hlen;
__be16 df, flags;
bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
if (use_cache)
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
rt = gre_get_rt(skb, dev, fl, key);
if (IS_ERR(rt))
goto err_free_skb;
goto err_free_skb;
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
fl->saddr);
}
tunnel_hlen = gre_calc_hlen(key->tun_flags);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
......@@ -476,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err_free_rt;
}
return rt;
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
dev->stats.tx_dropped++;
return NULL;
}
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl;
int tunnel_hlen;
__be16 df, flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
tunnel_hlen = gre_calc_hlen(key->tun_flags);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
return;
/* Push Tunnel header. */
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
......@@ -498,6 +551,64 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
dev->stats.tx_dropped++;
}
static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
struct rtable *rt = NULL;
bool truncate = false;
struct flowi4 fl;
int tunnel_hlen;
__be16 df;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
/* ERSPAN has fixed 8 byte GRE header */
tunnel_hlen = 8 + sizeof(struct erspanhdr);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
return;
if (gre_handle_offloads(skb, false))
goto err_free_rt;
if (skb->len > dev->mtu) {
pskb_trim(skb, dev->mtu);
truncate = true;
}
md = ip_tunnel_info_opts(tun_info);
if (!md)
goto err_free_rt;
erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
ntohl(md->index), truncate);
gre_build_header(skb, 8, TUNNEL_SEQ,
htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
key->tos, key->ttl, df, false);
return;
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
dev->stats.tx_dropped++;
}
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
......@@ -611,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false;
if (tunnel->collect_md) {
erspan_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
}
if (gre_handle_offloads(skb, false))
goto free_skb;
......@@ -973,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
return ret;
/* ERSPAN should only have GRE sequence and key flag */
flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
if (flags != (GRE_SEQ | GRE_KEY))
if (data[IFLA_GRE_OFLAGS])
flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
if (data[IFLA_GRE_IFLAGS])
flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
if (!data[IFLA_GRE_COLLECT_METADATA] &&
flags != (GRE_SEQ | GRE_KEY))
return -EINVAL;
/* ERSPAN Session ID only has 10-bit. Since we reuse
......
......@@ -17,6 +17,7 @@
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define _htonl __builtin_bswap32
#define ERROR(ret) do {\
......@@ -38,6 +39,10 @@ struct vxlan_metadata {
u32 gbp;
};
struct erspan_metadata {
__be32 index;
};
SEC("gre_set_tunnel")
int _gre_set_tunnel(struct __sk_buff *skb)
{
......@@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
md.index = htonl(123);
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
SEC("erspan_get_tunnel")
int _erspan_get_tunnel(struct __sk_buff *skb)
{
char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
index = bpf_ntohl(md.index);
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, index);
return TC_ACT_OK;
}
SEC("vxlan_set_tunnel")
int _vxlan_set_tunnel(struct __sk_buff *skb)
{
......@@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
char _license[] SEC("license") = "GPL";
......@@ -32,6 +32,19 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
function add_erspan_tunnel {
# in namespace
ip netns exec at_ns0 \
ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
# out of namespace
ip link add dev $DEV type $TYPE external
ip link set dev $DEV up
ip addr add dev $DEV 10.1.1.200/24
}
function add_vxlan_tunnel {
# Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets,
......@@ -99,6 +112,18 @@ function test_gre {
cleanup
}
function test_erspan {
TYPE=erspan
DEV_NS=erspan00
DEV=erspan11
config_device
add_erspan_tunnel
attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
ping -c 1 10.1.1.100
ip netns exec at_ns0 ping -c 1 10.1.1.200
cleanup
}
function test_vxlan {
TYPE=vxlan
DEV_NS=vxlan00
......@@ -151,14 +176,18 @@ function cleanup {
ip link del gretap11
ip link del vxlan11
ip link del geneve11
ip link del erspan11
pkill tcpdump
pkill cat
set -ex
}
trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
echo "Testing ERSPAN tunnel..."
test_erspan
echo "Testing VXLAN tunnel..."
test_vxlan
echo "Testing GENEVE tunnel..."
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment