Commit 3ddaed6b authored by David S. Miller's avatar David S. Miller

Merge branch 'pmtu-esp'

Vadim Fedorenko ays:

====================
Fix PMTU for ESP-in-UDP encapsulation

Bug 213669 uncovered regression in PMTU discovery for UDP-encapsulated
routes and some incorrect usage in udp tunnel fields. This series fixes
problems and also adds such case for selftests

v3:
 - update checking logic to account SCTP use case
v2:
 - remove refactor code that was in first patch
 - move checking logic to __udp{4,6}_lib_err_encap
 - add more tests, especially routed configuration
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 58acd100 ece1278a
......@@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
const struct iphdr *iph,
struct udphdr *uh,
struct udp_table *udptable,
struct sock *sk,
struct sk_buff *skb, u32 info)
{
int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
struct sock *sk;
struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
......@@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, iph->ihl << 2);
if (sk) {
up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (lookup && lookup(sk, skb))
sk = NULL;
goto out;
}
sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
iph->saddr, uh->dest, skb->dev->ifindex, 0,
udptable, NULL);
if (sk) {
int (*lookup)(struct sock *sk, struct sk_buff *skb);
struct udp_sock *up = udp_sk(sk);
up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
out:
if (!sk)
sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
......@@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udp_encap_needed_key)) {
sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
info);
if (!sk)
return 0;
}
} else
sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
......
......@@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
const struct ipv6hdr *hdr, int offset,
struct udphdr *uh,
struct udp_table *udptable,
struct sock *sk,
struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, __be32 info)
{
int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
struct sock *sk;
struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
......@@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, offset);
if (sk) {
up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (lookup && lookup(sk, skb))
sk = NULL;
goto out;
}
sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
&hdr->saddr, uh->dest,
inet6_iif(skb), 0, udptable, skb);
if (sk) {
int (*lookup)(struct sock *sk, struct sk_buff *skb);
struct udp_sock *up = udp_sk(sk);
up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
out:
if (!sk) {
sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
offset, info));
......@@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
udptable, skb,
udptable, sk, skb,
opt, type, code, info);
if (!sk)
return 0;
}
} else
sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
......
......@@ -11,9 +11,11 @@
#include <sys/socket.h>
#include <sys/wait.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netdb.h>
#include <fcntl.h>
#include <libgen.h>
......@@ -27,6 +29,10 @@
#include <time.h>
#include <errno.h>
#include <linux/xfrm.h>
#include <linux/ipsec.h>
#include <linux/pfkeyv2.h>
#ifndef IPV6_UNICAST_IF
#define IPV6_UNICAST_IF 76
#endif
......@@ -114,6 +120,9 @@ struct sock_args {
struct in_addr in;
struct in6_addr in6;
} expected_raddr;
/* ESP in UDP encap test */
int use_xfrm;
};
static int server_mode;
......@@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args)
return 0;
}
static int config_xfrm_policy(int sd, struct sock_args *args)
{
struct xfrm_userpolicy_info policy = {};
int type = UDP_ENCAP_ESPINUDP;
int xfrm_af = IP_XFRM_POLICY;
int level = SOL_IP;
if (args->type != SOCK_DGRAM) {
log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
return 1;
}
policy.action = XFRM_POLICY_ALLOW;
policy.sel.family = args->version;
if (args->version == AF_INET6) {
xfrm_af = IPV6_XFRM_POLICY;
level = SOL_IPV6;
}
policy.dir = XFRM_POLICY_OUT;
if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
return 1;
policy.dir = XFRM_POLICY_IN;
if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
return 1;
if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
log_err_errno("Failed to set xfrm encap");
return 1;
}
return 0;
}
static int lsock_init(struct sock_args *args)
{
long flags;
......@@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args)
if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
log_err_errno("Failed to set close-on-exec flag");
if (args->use_xfrm && config_xfrm_policy(sd, args)) {
log_err_errno("Failed to set xfrm policy");
goto err;
}
out:
return sd;
......@@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
return client_status;
}
#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq"
#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
static void print_usage(char *prog)
{
......@@ -1795,6 +1844,7 @@ static void print_usage(char *prog)
" -D|R datagram (D) / raw (R) socket (default stream)\n"
" -l addr local address to bind to in server mode\n"
" -c addr local address to bind to in client mode\n"
" -x configure XFRM policy on socket\n"
"\n"
" -d dev bind socket to given device name\n"
" -I dev bind socket to given device name - server mode\n"
......@@ -1966,6 +2016,9 @@ int main(int argc, char *argv[])
case 'q':
quiet = 1;
break;
case 'x':
args.use_xfrm = 1;
break;
default:
print_usage(argv[0]);
return 1;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment