Commit 8bf2464c authored by David S. Miller's avatar David S. Miller

Merge branch 'Bare-UDP-L3-Encapsulation-Module'

Martin Varghese says:

====================
Bare UDP L3 Encapsulation Module

There are various L3 encapsulation standards using UDP being discussed to
leverage the UDP based load balancing capability of different networks.
MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them.

The Bareudp tunnel module provides a generic L3 encapsulation tunnelling
support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside
a UDP tunnel.

Special Handling
----------------
The bareudp device supports special handling for MPLS & IP as they can have
multiple ethertypes.
MPLS procotcol can have ethertypes ETH_P_MPLS_UC  (unicast) & ETH_P_MPLS_MC (multicast).
IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6).
This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC
with a flag called multiproto mode.

Usage
------

1) Device creation & deletion

    a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847.

       This creates a bareudp tunnel device which tunnels L3 traffic with ethertype
       0x8847 (MPLS traffic). The destination port of the UDP header will be set to
       6635.The device will listen on UDP port 6635 to receive traffic.

    b) ip link delete bareudp0

2) Device creation with multiple proto mode enabled

There are two ways to create a bareudp device for MPLS & IP with multiproto mode
enabled.

    a) ip link add dev  bareudp0 type bareudp dstport 6635 ethertype 0x8847 multiproto

    b) ip link add dev  bareudp0 type bareudp dstport 6635 ethertype mpls

3) Device Usage

The bareudp device could be used along with OVS or flower filter in TC.
The OVS or TC flower layer must set the tunnel information in SKB dst field before
sending packet buffer to the bareudp device for transmission. On reception the
bareudp device extracts and stores the tunnel information in SKB dst field before
passing the packet buffer to the network stack.

Why not FOU ?
------------
FOU by design does l4 encapsulation.It maps udp port to ipproto (IP protocol number for l4 protocol).
Bareudp acheives a generic l3 encapsulation.It maps udp port to l3 ethertype.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 48851e9e 4b5f6723
.. SPDX-License-Identifier: GPL-2.0
========================================
Bare UDP Tunnelling Module Documentation
========================================
There are various L3 encapsulation standards using UDP being discussed to
leverage the UDP based load balancing capability of different networks.
MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them.
The Bareudp tunnel module provides a generic L3 encapsulation tunnelling
support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside
a UDP tunnel.
Special Handling
----------------
The bareudp device supports special handling for MPLS & IP as they can have
multiple ethertypes.
MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast).
IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6).
This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC
with a flag called multiproto mode.
Usage
------
1) Device creation & deletion
a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847.
This creates a bareudp tunnel device which tunnels L3 traffic with ethertype
0x8847 (MPLS traffic). The destination port of the UDP header will be set to
6635.The device will listen on UDP port 6635 to receive traffic.
b) ip link delete bareudp0
2) Device creation with multiple proto mode enabled
There are two ways to create a bareudp device for MPLS & IP with multiproto mode
enabled.
a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847 multiproto
b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls
3) Device Usage
The bareudp device could be used along with OVS or flower filter in TC.
The OVS or TC flower layer must set the tunnel information in SKB dst field before
sending packet buffer to the bareudp device for transmission. On reception the
bareudp device extracts and stores the tunnel information in SKB dst field before
passing the packet buffer to the network stack.
......@@ -8,6 +8,7 @@ Contents:
netdev-FAQ
af_xdp
bareudp
batman-adv
can
can_ucan_protocol
......
......@@ -258,6 +258,19 @@ config GENEVE
To compile this driver as a module, choose M here: the module
will be called geneve.
config BAREUDP
tristate "Bare UDP Encapsulation"
depends on INET
depends on IPV6 || !IPV6
select NET_UDP_TUNNEL
select GRO_CELLS
help
This adds a bare UDP tunnel module for tunnelling different
kinds of traffic like MPLS, IP, etc. inside a UDP tunnel.
To compile this driver as a module, choose M here: the module
will be called bareudp.
config GTP
tristate "GPRS Tunneling Protocol datapath (GTP-U)"
depends on INET
......
......@@ -29,6 +29,7 @@ obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
obj-$(CONFIG_VXLAN) += vxlan.o
obj-$(CONFIG_GENEVE) += geneve.o
obj-$(CONFIG_BAREUDP) += bareudp.o
obj-$(CONFIG_GTP) += gtp.o
obj-$(CONFIG_NLMON) += nlmon.o
obj-$(CONFIG_NET_VRF) += vrf.o
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_BAREUDP_H
#define __NET_BAREUDP_H
#include <linux/types.h>
#include <linux/skbuff.h>
struct bareudp_conf {
__be16 ethertype;
__be16 port;
u16 sport_min;
bool multi_proto_mode;
};
struct net_device *bareudp_dev_create(struct net *net, const char *name,
u8 name_assign_type,
struct bareudp_conf *info);
#endif
......@@ -1027,6 +1027,12 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected);
struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net, struct socket *sock,
struct in6_addr *saddr,
const struct ip_tunnel_info *info,
u8 protocol, bool use_cache);
struct dst_entry *ip6_blackhole_route(struct net *net,
struct dst_entry *orig_dst);
......
......@@ -128,6 +128,12 @@ static inline struct rtable *__ip_route_output_key(struct net *net,
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net, __be32 *saddr,
const struct ip_tunnel_info *info,
u8 protocol, bool use_cache);
struct dst_entry *ipv4_blackhole_route(struct net *net,
struct dst_entry *dst_orig);
......
......@@ -590,6 +590,18 @@ enum ifla_geneve_df {
GENEVE_DF_MAX = __GENEVE_DF_END - 1,
};
/* Bareudp section */
enum {
IFLA_BAREUDP_UNSPEC,
IFLA_BAREUDP_PORT,
IFLA_BAREUDP_ETHERTYPE,
IFLA_BAREUDP_SRCPORT_MIN,
IFLA_BAREUDP_MULTIPROTO_MODE,
__IFLA_BAREUDP_MAX
};
#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
/* PPP section */
enum {
IFLA_PPP_UNSPEC,
......
......@@ -2774,6 +2774,54 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net, __be32 *saddr,
const struct ip_tunnel_info *info,
u8 protocol, bool use_cache)
{
#ifdef CONFIG_DST_CACHE
struct dst_cache *dst_cache;
#endif
struct rtable *rt = NULL;
struct flowi4 fl4;
__u8 tos;
#ifdef CONFIG_DST_CACHE
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
}
#endif
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = protocol;
fl4.daddr = info->key.u.ipv4.dst;
fl4.saddr = info->key.u.ipv4.src;
tos = info->key.tos;
fl4.flowi4_tos = RT_TOS(tos);
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
#ifdef CONFIG_DST_CACHE
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
#endif
*saddr = fl4.saddr;
return rt;
}
EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
struct rtable *rt, u32 table_id, struct flowi4 *fl4,
......
......@@ -54,6 +54,7 @@
#include <linux/mroute6.h>
#include <net/l3mdev.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
......@@ -1196,6 +1197,75 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
/**
* ip6_dst_lookup_tunnel - perform route lookup on tunnel
* @skb: Packet for which lookup is done
* @dev: Tunnel device
* @net: Network namespace of tunnel device
* @sk: Socket which provides route info
* @saddr: Memory to store the src ip address
* @info: Tunnel information
* @protocol: IP protocol
* @use_cahce: Flag to enable cache usage
* This function performs a route lookup on a tunnel
*
* It returns a valid dst pointer and stores src address to be used in
* tunnel in param saddr on success, else a pointer encoded error code.
*/
struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
struct net_device *dev,
struct net *net,
struct socket *sock,
struct in6_addr *saddr,
const struct ip_tunnel_info *info,
u8 protocol,
bool use_cache)
{
struct dst_entry *dst = NULL;
#ifdef CONFIG_DST_CACHE
struct dst_cache *dst_cache;
#endif
struct flowi6 fl6;
__u8 prio;
#ifdef CONFIG_DST_CACHE
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, saddr);
if (dst)
return dst;
}
#endif
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = protocol;
fl6.daddr = info->key.u.ipv6.dst;
fl6.saddr = info->key.u.ipv6.src;
prio = info->key.tos;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
info->key.label);
dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
NULL);
if (IS_ERR(dst)) {
netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
return ERR_PTR(-ENETUNREACH);
}
if (dst->dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
dst_release(dst);
return ERR_PTR(-ELOOP);
}
#ifdef CONFIG_DST_CACHE
if (use_cache)
dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
#endif
*saddr = fl6.saddr;
return dst;
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment