Commit 172bf009 authored by Steffen Klassert's avatar Steffen Klassert

xfrm: Support GRO for IPv4 ESP in UDP encapsulation

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Enabling this would imporove performance for ESP in UDP datapath, i.e
IPsec with NAT in between.

By default GRP for ESP-in-UDP is disabled for UDP sockets.
To enable this feature for an ESP socket, the following two options
need to be set:
1. enable ESP-in-UDP: (this is already set by an IKE daemon).
   int type = UDP_ENCAP_ESPINUDP;
   setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type));

2. To enable GRO for ESP in UDP socket:
   type = true;
   setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type));

Enabling ESP-in-UDP has the side effect of preventing the Linux stack from
seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are
immediately decapsulated from UDP and decrypted.
This change may affect nftable rules that match on ESP packets at L3.
Also tcpdump won't see the ESP packet.

Developers/admins are advised to review and adapt any nftable rules
accordingly before enabling this feature to prevent potential rule breakage.
Also tcpdump will not see from ESP packets from a ESP in UDP flow, when this
is enabled.
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: default avatarAntony Antony <antony.antony@secunet.com>
Signed-off-by: default avatarAntony Antony <antony.antony@secunet.com>
Reviewed-by: default avatarEyal Birger <eyal.birger@gmail.com>
parent b439475a
...@@ -41,7 +41,7 @@ struct napi_gro_cb { ...@@ -41,7 +41,7 @@ struct napi_gro_cb {
/* Number of segments aggregated. */ /* Number of segments aggregated. */
u16 count; u16 count;
/* Used in ipv6_gro_receive() and foo-over-udp */ /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
u16 proto; u16 proto;
/* Used in napi_gro_cb::free */ /* Used in napi_gro_cb::free */
......
...@@ -1710,6 +1710,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); ...@@ -1710,6 +1710,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen); int optlen);
#else #else
......
...@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, ...@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb); int offset = skb_gro_offset(skb);
struct xfrm_offload *xo; struct xfrm_offload *xo;
struct xfrm_state *x; struct xfrm_state *x;
int encap_type = 0;
__be32 seq; __be32 seq;
__be32 spi; __be32 spi;
...@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, ...@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo->flags |= XFRM_GRO; xo->flags |= XFRM_GRO;
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
encap_type = UDP_ENCAP_ESPINUDP;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
...@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, ...@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all /* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */ * the error handling and frees the resources on error. */
xfrm_input(skb, IPPROTO_ESP, spi, 0); xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS); return ERR_PTR(-EINPROGRESS);
out_reset: out_reset:
......
...@@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk) ...@@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk)
} }
} }
static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
struct sock *sk)
{
#ifdef CONFIG_XFRM
if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
if (family == AF_INET)
WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
}
#endif
}
/* /*
* Socket option code for UDP * Socket option code for UDP
*/ */
...@@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, ...@@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case 0: case 0:
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP:
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
fallthrough;
case UDP_ENCAP_ESPINUDP_NON_IKE: case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) if (sk->sk_family == AF_INET6)
...@@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, ...@@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
udp_tunnel_encap_enable(sk); udp_tunnel_encap_enable(sk);
udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(GRO_ENABLED, sk, valbool);
udp_assign_bit(ACCEPT_L4, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool);
set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
break; break;
/* /*
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/protocol.h>
#include <net/gro.h>
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
...@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ...@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return 0; return 0;
} }
/* If it's a keepalive packet, then just eat it. static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{ {
struct udp_sock *up = udp_sk(sk); struct udp_sock *up = udp_sk(sk);
struct udphdr *uh; struct udphdr *uh;
...@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */ /* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) { if (len == 1 && udpdata[0] == 0xff) {
goto drop; return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */ /* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr); len = sizeof(struct udphdr);
...@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE: case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */ /* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) { if (len == 1 && udpdata[0] == 0xff) {
goto drop; return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) { udpdata32[0] == 0 && udpdata32[1] == 0) {
...@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver. * protocol to ESP, and then call into the transform receiver.
*/ */
if (skb_unclone(skb, GFP_ATOMIC)) if (skb_unclone(skb, GFP_ATOMIC))
goto drop; return -EINVAL;
/* Now we can update and verify the packet length... */ /* Now we can update and verify the packet length... */
iph = ip_hdr(skb); iph = ip_hdr(skb);
...@@ -147,25 +142,88 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) ...@@ -147,25 +142,88 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
iph->tot_len = htons(ntohs(iph->tot_len) - len); iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) { if (skb->len < iphlen + len) {
/* packet is too small!?! */ /* packet is too small!?! */
goto drop; return -EINVAL;
} }
/* pull the data buffer up to the ESP header and set the /* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack * transport header to point to ESP. Keep UDP on the stack
* for later. * for later.
*/ */
__skb_pull(skb, len); if (pull) {
skb_reset_transport_header(skb); __skb_pull(skb, len);
skb_reset_transport_header(skb);
} else {
skb_set_transport_header(skb, len);
}
/* process ESP */ /* process ESP */
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
drop:
kfree_skb(skb);
return 0; return 0;
} }
EXPORT_SYMBOL(xfrm4_udp_encap_rcv); EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
/* If it's a keepalive packet, then just eat it.
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = __xfrm4_udp_encap_rcv(sk, skb, true);
if (!ret)
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
udp_sk(sk)->encap_type);
if (ret < 0) {
kfree_skb(skb);
return 0;
}
return ret;
}
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb)
{
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
int ret;
offset = offset - sizeof(struct udphdr);
if (!pskb_pull(skb, offset))
return NULL;
rcu_read_lock();
ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
ret = __xfrm4_udp_encap_rcv(sk, skb, false);
if (ret)
goto out;
skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
rcu_read_unlock();
return pp;
out:
rcu_read_unlock();
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
int xfrm4_rcv(struct sk_buff *skb) int xfrm4_rcv(struct sk_buff *skb)
{ {
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment