Commit 8eb517a2 authored by David S. Miller's avatar David S. Miller

Merge branch 'reset-mac'

Guillaume Nault says:

====================
net: reset MAC header consistently across L3 virtual devices

Some virtual L3 devices, like vxlan-gpe and gre (in collect_md mode),
reset the MAC header pointer after they parsed the outer headers. This
accurately reflects the fact that the decapsulated packet is pure L3
packet, as that makes the MAC header 0 bytes long (the MAC and network
header pointers are equal).

However, many L3 devices only adjust the network header after
decapsulation and leave the MAC header pointer to its original value.
This can confuse other parts of the networking stack, like TC, which
then considers the outer headers as one big MAC header.

This patch series makes the following L3 tunnels behave like VXLAN-GPE:
bareudp, ipip, sit, gre, ip6gre, ip6tnl, gtp.

The case of gre is a bit special. It already resets the MAC header
pointer in collect_md mode, so only the classical mode needs to be
adjusted. However, gre also has a special case that expects the MAC
header pointer to keep pointing to the outer header even after
decapsulation. Therefore, patch 4 keeps an exception for this case.

Ideally, we'd centralise the call to skb_reset_mac_header() in
ip_tunnel_rcv(), to avoid manual calls in ipip (patch 2),
sit (patch 3) and gre (patch 4). That's unfortunately not feasible
currently, because of the gre special case discussed above that
precludes us from resetting the MAC header unconditionally.

The original motivation is to redirect bareudp packets to Ethernet
devices (as described in patch 1). The rest of this series aims at
bringing consistency across all L3 devices (apart from gre's special
case unfortunately).

Note: the gtp patch results from pure code inspection and has been
compiled tested only.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ff8744b5 b2d898c8
...@@ -133,6 +133,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -133,6 +133,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
skb->dev = bareudp->dev; skb->dev = bareudp->dev;
oiph = skb_network_header(skb); oiph = skb_network_header(skb);
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb_reset_mac_header(skb);
if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET)
err = IP_ECN_decapsulate(oiph, skb); err = IP_ECN_decapsulate(oiph, skb);
......
...@@ -201,6 +201,7 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, ...@@ -201,6 +201,7 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
* calculate the transport header. * calculate the transport header.
*/ */
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb_reset_mac_header(skb);
skb->dev = pctx->dev; skb->dev = pctx->dev;
......
...@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644); ...@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly; static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static const struct header_ops ipgre_header_ops;
static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb, static void erspan_build_header(struct sk_buff *skb,
u32 id, u32 index, u32 id, u32 index,
...@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, ...@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
raw_proto, false) < 0) raw_proto, false) < 0)
goto drop; goto drop;
if (tunnel->dev->type != ARPHRD_NONE) /* Special case for ipgre_header_parse(), which expects the
* mac_header to point to the outer IP header.
*/
if (tunnel->dev->header_ops == &ipgre_header_ops)
skb_pop_mac_header(skb); skb_pop_mac_header(skb);
else else
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
......
...@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) ...@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (!tun_dst) if (!tun_dst)
return 0; return 0;
} }
skb_reset_mac_header(skb);
return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
} }
......
...@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, ...@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else { } else {
skb->dev = tunnel->dev; skb->dev = tunnel->dev;
skb_reset_mac_header(skb);
} }
skb_reset_network_header(skb); skb_reset_network_header(skb);
......
...@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb) ...@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
* old iph is no longer valid * old iph is no longer valid
*/ */
iph = (const struct iphdr *)skb_mac_header(skb); iph = (const struct iphdr *)skb_mac_header(skb);
skb_reset_mac_header(skb);
err = IP_ECN_decapsulate(iph, skb); err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) { if (unlikely(err)) {
if (log_ecn_error) if (log_ecn_error)
...@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) ...@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi; tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false)) if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop; goto drop;
skb_reset_mac_header(skb);
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment