Commit e0f9f0e0 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'ipsec-next-2023-10-28' of...

Merge tag 'ipsec-next-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next

Steffen Klassert says:

====================
pull request (net-next): ipsec-next 2023-10-28

1) Remove unused function declarations of xfrm4_extract_input and
   xfrm6_extract_input. From Yue Haibing.

2) Annotate struct xfrm_sec_ctx with __counted_by.
   From Kees Cook.

3) Support GRO decapsulation for ESP in UDP encapsulation.
   From Antony Antony et all.

4) Replace the xfrm session decode with flow dissector.
   From Florian Westphal.

5) Fix a use after free in __xfrm6_udp_encap_rcv.

6) Fix the layer 4 flowi decoding.
   From Florian Westphal.

* tag 'ipsec-next-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next:
  xfrm: policy: fix layer 4 flowi decoding
  xfrm Fix use after free in __xfrm6_udp_encap_rcv.
  xfrm: policy: replace session decode with flow dissector
  xfrm: move mark and oif flowi decode into common code
  xfrm: pass struct net to xfrm_decode_session wrappers
  xfrm: Support GRO for IPv6 ESP in UDP encapsulation
  xfrm: Support GRO for IPv4 ESP in UDP encapsulation
  xfrm: Use the XFRM_GRO to indicate a GRO call on input
  xfrm: Annotate struct xfrm_sec_ctx with __counted_by
  xfrm: Remove unused function declarations
====================

Link: https://lore.kernel.org/r/20231028084328.3119236-1-steffen.klassert@secunet.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 55c90047 eefed766
......@@ -41,7 +41,7 @@ struct napi_gro_cb {
/* Number of segments aggregated. */
u16 count;
/* Used in ipv6_gro_receive() and foo-over-udp */
/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
u16 proto;
/* Used in napi_gro_cb::free */
......
......@@ -60,6 +60,9 @@ struct ipv6_stub {
#if IS_ENABLED(CONFIG_XFRM)
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
struct list_head *head,
struct sk_buff *skb);
int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
#endif
......
......@@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 0);
return __xfrm_decode_session(net, skb, fl, family, 0);
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 1);
return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
......@@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
......@@ -1669,7 +1669,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
......@@ -1689,7 +1688,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
......@@ -1712,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb);
struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
......
......@@ -4,6 +4,7 @@
#include <linux/in6.h>
#include <linux/types.h>
#include <linux/stddef.h>
/* All of the structures in this file may not change size as they are
* passed into the kernel from userspace via netlink sockets.
......@@ -33,7 +34,7 @@ struct xfrm_sec_ctx {
__u8 ctx_alg;
__u16 ctx_len;
__u32 ctx_sid;
char ctx_str[];
char ctx_str[] __counted_by(ctx_len);
};
/* Security Context Domains of Interpretation */
......
......@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
int encap_type = 0;
__be32 seq;
__be32 spi;
......@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo->flags |= XFRM_GRO;
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
encap_type = UDP_ENCAP_ESPINUDP;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
......@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
xfrm_input(skb, IPPROTO_ESP, spi, -2);
xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
......
......@@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
} else
return rt;
err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
......
......@@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
default:
goto tx_err;
......
......@@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
......
......@@ -2630,6 +2630,19 @@ void udp_destroy_sock(struct sock *sk)
}
}
static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
struct sock *sk)
{
#ifdef CONFIG_XFRM
if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
if (family == AF_INET)
WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
}
#endif
}
/*
* Socket option code for UDP
*/
......@@ -2679,6 +2692,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case 0:
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
fallthrough;
case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
......@@ -2721,6 +2736,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
udp_tunnel_encap_enable(sk);
udp_assign_bit(GRO_ENABLED, sk, valbool);
udp_assign_bit(ACCEPT_L4, sk, valbool);
set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
break;
/*
......
......@@ -17,6 +17,8 @@
#include <linux/netfilter_ipv4.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/protocol.h>
#include <net/gro.h>
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb)
......@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return 0;
}
/* If it's a keepalive packet, then just eat it.
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
......@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
goto drop;
return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
......@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
goto drop;
return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
......@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
goto drop;
return -EINVAL;
/* Now we can update and verify the packet length... */
iph = ip_hdr(skb);
......@@ -147,25 +142,89 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) {
/* packet is too small!?! */
goto drop;
return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
__skb_pull(skb, len);
skb_reset_transport_header(skb);
if (pull) {
__skb_pull(skb, len);
skb_reset_transport_header(skb);
} else {
skb_set_transport_header(skb, len);
}
/* process ESP */
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
drop:
kfree_skb(skb);
return 0;
}
/* If it's a keepalive packet, then just eat it.
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
int ret;
ret = __xfrm4_udp_encap_rcv(sk, skb, true);
if (!ret)
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
udp_sk(sk)->encap_type);
if (ret < 0) {
kfree_skb(skb);
return 0;
}
return ret;
}
EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb)
{
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
int ret;
offset = offset - sizeof(struct udphdr);
if (!pskb_pull(skb, offset))
return NULL;
rcu_read_lock();
ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
ret = __xfrm4_udp_encap_rcv(sk, skb, false);
if (ret)
goto out;
skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
rcu_read_unlock();
return pp;
out:
rcu_read_unlock();
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv);
int xfrm4_rcv(struct sk_buff *skb)
{
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
......
......@@ -1050,6 +1050,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
#if IS_ENABLED(CONFIG_XFRM)
.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
.xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
......
......@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
int off = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr;
if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
/* ESP or ESPINUDP */
if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
ipv6_hdr->nexthdr == NEXTHDR_UDP))
return offsetof(struct ipv6hdr, nexthdr);
while (off < nhlen) {
......@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
int encap_type = 0;
__be32 seq;
__be32 spi;
int nhoff;
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
encap_type = UDP_ENCAP_ESPINUDP;
if (!pskb_pull(skb, offset))
return NULL;
......@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
xfrm_input(skb, IPPROTO_ESP, spi, -2);
xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
......
......@@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
return dst;
}
err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
if (err)
goto relookup_failed;
......
......@@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
default:
goto tx_err;
......
......@@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
......
......@@ -16,6 +16,8 @@
#include <linux/netfilter_ipv6.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
#include <net/protocol.h>
#include <net/gro.h>
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t)
......@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 0;
}
/* If it's a keepalive packet, then just eat it.
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
......@@ -85,9 +80,6 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
__be32 *udpdata32;
u16 encap_type;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_udp_encap_rcv(sk, skb);
encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
......@@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
goto drop;
return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
......@@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
goto drop;
return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
......@@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
goto drop;
return -EINVAL;
/* Now we can update and verify the packet length... */
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
if (skb->len < ip6hlen + len) {
/* packet is too small!?! */
goto drop;
return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
__skb_pull(skb, len);
skb_reset_transport_header(skb);
if (pull) {
__skb_pull(skb, len);
skb_reset_transport_header(skb);
} else {
skb_set_transport_header(skb, len);
}
/* process ESP */
return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
drop:
kfree_skb(skb);
return 0;
}
/* If it's a keepalive packet, then just eat it.
* If it's an encapsulated packet, then pass it to the
* IPsec xfrm input.
* Returns 0 if skb passed to xfrm or was dropped.
* Returns >0 if skb should be passed to UDP.
* Returns <0 if skb should be resubmitted (-ret is protocol)
*/
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
int ret;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_udp_encap_rcv(sk, skb);
ret = __xfrm6_udp_encap_rcv(sk, skb, true);
if (!ret)
return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
udp_sk(sk)->encap_type);
if (ret < 0) {
kfree_skb(skb);
return 0;
}
return ret;
}
struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
struct sk_buff *skb)
{
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
int ret;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_gro_udp_encap_rcv(sk, head, skb);
offset = offset - sizeof(struct udphdr);
if (!pskb_pull(skb, offset))
return NULL;
rcu_read_lock();
ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
ret = __xfrm6_udp_encap_rcv(sk, skb, false);
if (ret)
goto out;
skb_push(skb, offset);
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
rcu_read_unlock();
return pp;
out:
rcu_read_unlock();
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
......
......@@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
struct flowi fl;
int err;
err = xfrm_decode_session(skb, &fl, family);
err = xfrm_decode_session(net, skb, &fl, family);
if (err < 0)
return err;
......
......@@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
if (encap_type < 0) {
if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
x = xfrm_input_state(skb);
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
......@@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
/* encap_type < -1 indicates a GRO call. */
encap_type = 0;
/* GRO call */
seq = XFRM_SPI_SKB_CB(skb)->seq;
if (xo && (xo->flags & CRYPTO_DONE)) {
......
......@@ -536,7 +536,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
......@@ -551,7 +551,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
......
......@@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
struct xfrm_flow_keys {
struct flow_dissector_key_basic basic;
struct flow_dissector_key_control control;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
} addrs;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_icmp icmp;
struct flow_dissector_key_ports ports;
struct flow_dissector_key_keyid gre;
};
static struct flow_dissector xfrm_session_dissector __ro_after_init;
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
......@@ -2858,7 +2873,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, dst->ops->family);
xfrm_decode_session(net, skb, &fl, dst->ops->family);
skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
......@@ -2894,7 +2909,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
skb->mark = skb_mark;
dst_hold(xfrm_dst_path(skb_dst(skb)));
......@@ -3372,209 +3387,106 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
}
static void
decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
const struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl;
u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (xprth + 4 < skb->data ||
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ports;
xprth = skb_network_header(skb) + ihl * 4;
ports = (__be16 *)xprth;
fl4->fl4_sport = ports[!!reverse];
fl4->fl4_dport = ports[!reverse];
}
break;
case IPPROTO_ICMP:
if (xprth + 2 < skb->data ||
pskb_may_pull(skb, xprth + 2 - skb->data)) {
u8 *icmp;
xprth = skb_network_header(skb) + ihl * 4;
icmp = xprth;
if (reverse) {
fl4->saddr = flkeys->addrs.ipv4.dst;
fl4->daddr = flkeys->addrs.ipv4.src;
fl4->fl4_sport = flkeys->ports.dst;
fl4->fl4_dport = flkeys->ports.src;
} else {
fl4->saddr = flkeys->addrs.ipv4.src;
fl4->daddr = flkeys->addrs.ipv4.dst;
fl4->fl4_sport = flkeys->ports.src;
fl4->fl4_dport = flkeys->ports.dst;
}
fl4->fl4_icmp_type = icmp[0];
fl4->fl4_icmp_code = icmp[1];
}
break;
case IPPROTO_GRE:
if (xprth + 12 < skb->data ||
pskb_may_pull(skb, xprth + 12 - skb->data)) {
__be16 *greflags;
__be32 *gre_hdr;
xprth = skb_network_header(skb) + ihl * 4;
greflags = (__be16 *)xprth;
gre_hdr = (__be32 *)xprth;
if (greflags[0] & GRE_KEY) {
if (greflags[0] & GRE_CSUM)
gre_hdr++;
fl4->fl4_gre_key = gre_hdr[1];
}
}
break;
default:
break;
}
switch (flkeys->basic.ip_proto) {
case IPPROTO_GRE:
fl4->fl4_gre_key = flkeys->gre.keyid;
break;
case IPPROTO_ICMP:
fl4->fl4_icmp_type = flkeys->icmp.type;
fl4->fl4_icmp_code = flkeys->icmp.code;
break;
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
fl4->flowi4_tos = flkeys->ip.tos;
}
#if IS_ENABLED(CONFIG_IPV6)
static void
decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
struct flowi6 *fl6 = &fl->u.ip6;
int onlyproto = 0;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
u32 offset = sizeof(*hdr);
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u16 nhoff = IP6CB(skb)->nhoff;
int oif = 0;
u8 nexthdr;
if (!nhoff)
nhoff = offsetof(struct ipv6hdr, nexthdr);
nexthdr = nh[nhoff];
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
while (nh + offset + sizeof(*exthdr) < skb->data ||
pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
nh = skb_network_header(skb);
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
switch (nexthdr) {
case NEXTHDR_FRAGMENT:
onlyproto = 1;
fallthrough;
case NEXTHDR_ROUTING:
case NEXTHDR_HOP:
case NEXTHDR_DEST:
offset += ipv6_optlen(exthdr);
nexthdr = exthdr->nexthdr;
break;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (!onlyproto && (nh + offset + 4 < skb->data ||
pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
__be16 *ports;
nh = skb_network_header(skb);
ports = (__be16 *)(nh + offset);
fl6->fl6_sport = ports[!!reverse];
fl6->fl6_dport = ports[!reverse];
}
fl6->flowi6_proto = nexthdr;
return;
case IPPROTO_ICMPV6:
if (!onlyproto && (nh + offset + 2 < skb->data ||
pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
icmp = (u8 *)(nh + offset);
fl6->fl6_icmp_type = icmp[0];
fl6->fl6_icmp_code = icmp[1];
}
fl6->flowi6_proto = nexthdr;
return;
case IPPROTO_GRE:
if (!onlyproto &&
(nh + offset + 12 < skb->data ||
pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
struct gre_base_hdr *gre_hdr;
__be32 *gre_key;
nh = skb_network_header(skb);
gre_hdr = (struct gre_base_hdr *)(nh + offset);
gre_key = (__be32 *)(gre_hdr + 1);
if (gre_hdr->flags & GRE_KEY) {
if (gre_hdr->flags & GRE_CSUM)
gre_key++;
fl6->fl6_gre_key = *gre_key;
}
}
fl6->flowi6_proto = nexthdr;
return;
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
if (!onlyproto && (nh + offset + 3 < skb->data ||
pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
mh = (struct ip6_mh *)(nh + offset);
fl6->fl6_mh_type = mh->ip6mh_type;
}
fl6->flowi6_proto = nexthdr;
return;
#endif
default:
fl6->flowi6_proto = nexthdr;
return;
}
if (reverse) {
fl6->saddr = flkeys->addrs.ipv6.dst;
fl6->daddr = flkeys->addrs.ipv6.src;
fl6->fl6_sport = flkeys->ports.dst;
fl6->fl6_dport = flkeys->ports.src;
} else {
fl6->saddr = flkeys->addrs.ipv6.src;
fl6->daddr = flkeys->addrs.ipv6.dst;
fl6->fl6_sport = flkeys->ports.src;
fl6->fl6_dport = flkeys->ports.dst;
}
switch (flkeys->basic.ip_proto) {
case IPPROTO_GRE:
fl6->fl6_gre_key = flkeys->gre.keyid;
break;
case IPPROTO_ICMPV6:
fl6->fl6_icmp_type = flkeys->icmp.type;
fl6->fl6_icmp_code = flkeys->icmp.code;
break;
}
fl6->flowi6_proto = flkeys->basic.ip_proto;
}
#endif
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
struct xfrm_flow_keys flkeys;
memset(&flkeys, 0, sizeof(flkeys));
__skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
switch (family) {
case AF_INET:
decode_session4(skb, fl, reverse);
decode_session4(&flkeys, fl, reverse);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
decode_session6(skb, fl, reverse);
decode_session6(&flkeys, fl, reverse);
break;
#endif
default:
return -EAFNOSUPPORT;
}
fl->flowi_mark = skb->mark;
if (reverse) {
fl->flowi_oif = skb->skb_iif;
} else {
int oif = 0;
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
fl->flowi_oif = oif;
}
return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
......@@ -3623,7 +3535,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
return 0;
}
......@@ -3779,7 +3691,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
struct dst_entry *dst;
int res = 1;
if (xfrm_decode_session(skb, &fl, family) < 0) {
if (xfrm_decode_session(net, skb, &fl, family) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
......@@ -4258,8 +4170,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
.exit = xfrm_net_exit,
};
static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
.offset = offsetof(struct xfrm_flow_keys, control),
},
{
.key_id = FLOW_DISSECTOR_KEY_BASIC,
.offset = offsetof(struct xfrm_flow_keys, basic),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
.offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
.offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
.offset = offsetof(struct xfrm_flow_keys, ports),
},
{
.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
.offset = offsetof(struct xfrm_flow_keys, gre),
},
{
.key_id = FLOW_DISSECTOR_KEY_IP,
.offset = offsetof(struct xfrm_flow_keys, ip),
},
{
.key_id = FLOW_DISSECTOR_KEY_ICMP,
.offset = offsetof(struct xfrm_flow_keys, icmp),
},
};
void __init xfrm_init(void)
{
skb_flow_dissector_init(&xfrm_session_dissector,
xfrm_flow_dissector_keys,
ARRAY_SIZE(xfrm_flow_dissector_keys));
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
xfrm_input_init();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment