Commit 20da4ef9 authored by David S. Miller's avatar David S. Miller

Merge branch 'ICMP-error-handling-for-UDP-tunnels'

Stefano Brivio says:

====================
ICMP error handling for UDP tunnels

This series introduces ICMP error handling for UDP tunnels and
encapsulations and related selftests. We need to handle ICMP errors to
support PMTU discovery and route redirection -- this support is entirely
missing right now:

- patch 1/11 adds a socket lookup for UDP tunnels that use, by design,
  the same destination port on both endpoints -- i.e. VXLAN and GENEVE
- patches 2/11 to 7/11 are specific to VxLAN and GENEVE
- patches 8/11 and 9/11 add infrastructure for lookup of encapsulations
  where sent packets cannot be matched via receiving socket lookup, i.e.
  FoU and GUE
- patches 10/11 and 11/11 are specific to FoU and GUE

v2: changes are listed in the single patches
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 141b95d5 56fd865f
......@@ -70,6 +70,7 @@ struct geneve_dev {
bool collect_md;
bool use_udp6_rx_checksums;
bool ttl_inherit;
enum ifla_geneve_df df;
};
struct geneve_sock {
......@@ -387,6 +388,57 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 0;
}
/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct genevehdr *geneveh;
struct geneve_sock *gs;
u8 zero_vni[3] = { 0 };
u8 *vni = zero_vni;
if (skb->len < GENEVE_BASE_HLEN)
return -EINVAL;
geneveh = geneve_hdr(skb);
if (geneveh->ver != GENEVE_VER)
return -EINVAL;
if (geneveh->proto_type != htons(ETH_P_TEB))
return -EINVAL;
gs = rcu_dereference_sk_user_data(sk);
if (!gs)
return -ENOENT;
if (geneve_get_sk_family(gs) == AF_INET) {
struct iphdr *iph = ip_hdr(skb);
__be32 addr4 = 0;
if (!gs->collect_md) {
vni = geneve_hdr(skb)->vni;
addr4 = iph->daddr;
}
return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
}
#if IS_ENABLED(CONFIG_IPV6)
if (geneve_get_sk_family(gs) == AF_INET6) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct in6_addr addr6 = { 0 };
if (!gs->collect_md) {
vni = geneve_hdr(skb)->vni;
addr6 = ip6h->daddr;
}
return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
}
#endif
return -EPFNOSUPPORT;
}
static struct socket *geneve_create_sock(struct net *net, bool ipv6,
__be16 port, bool ipv6_rx_csum)
{
......@@ -544,6 +596,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
tunnel_cfg.gro_receive = geneve_gro_receive;
tunnel_cfg.gro_complete = geneve_gro_complete;
tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
tunnel_cfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
list_add(&gs->list, &gn->sock_list);
......@@ -823,8 +876,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
struct flowi4 fl4;
__u8 tos, ttl;
__be16 df = 0;
__be16 sport;
__be16 df;
int err;
rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
......@@ -838,6 +891,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (geneve->collect_md) {
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
if (geneve->ttl_inherit)
......@@ -845,8 +900,22 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
else
ttl = key->ttl;
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
if (geneve->df == GENEVE_DF_SET) {
df = htons(IP_DF);
} else if (geneve->df == GENEVE_DF_INHERIT) {
struct ethhdr *eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_IPV6) {
df = htons(IP_DF);
} else if (ntohs(eth->h_proto) == ETH_P_IP) {
struct iphdr *iph = ip_hdr(skb);
if (iph->frag_off & htons(IP_DF))
df = htons(IP_DF);
}
}
}
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
if (unlikely(err))
......@@ -1093,6 +1162,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
};
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
......@@ -1128,6 +1198,16 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
if (data[IFLA_GENEVE_DF]) {
enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
if (df < 0 || df > GENEVE_DF_MAX) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
"Invalid DF attribute");
return -EINVAL;
}
}
return 0;
}
......@@ -1173,7 +1253,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
struct netlink_ext_ack *extack,
const struct ip_tunnel_info *info,
bool metadata, bool ipv6_rx_csum,
bool ttl_inherit)
bool ttl_inherit, enum ifla_geneve_df df)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
......@@ -1223,6 +1303,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
geneve->collect_md = metadata;
geneve->use_udp6_rx_checksums = ipv6_rx_csum;
geneve->ttl_inherit = ttl_inherit;
geneve->df = df;
err = register_netdevice(dev);
if (err)
......@@ -1242,7 +1323,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack,
struct ip_tunnel_info *info, bool *metadata,
bool *use_udp6_rx_checksums, bool *ttl_inherit,
bool changelink)
enum ifla_geneve_df *df, bool changelink)
{
int attrtype;
......@@ -1330,6 +1411,9 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_GENEVE_TOS])
info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
if (data[IFLA_GENEVE_DF])
*df = nla_get_u8(data[IFLA_GENEVE_DF]);
if (data[IFLA_GENEVE_LABEL]) {
info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
IPV6_FLOWLABEL_MASK;
......@@ -1448,6 +1532,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
enum ifla_geneve_df df = GENEVE_DF_UNSET;
bool use_udp6_rx_checksums = false;
struct ip_tunnel_info info;
bool ttl_inherit = false;
......@@ -1456,12 +1541,12 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
init_tnl_info(&info, GENEVE_UDP_PORT);
err = geneve_nl2info(tb, data, extack, &info, &metadata,
&use_udp6_rx_checksums, &ttl_inherit, false);
&use_udp6_rx_checksums, &ttl_inherit, &df, false);
if (err)
return err;
err = geneve_configure(net, dev, extack, &info, metadata,
use_udp6_rx_checksums, ttl_inherit);
use_udp6_rx_checksums, ttl_inherit, df);
if (err)
return err;
......@@ -1524,6 +1609,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_info info;
bool metadata;
bool use_udp6_rx_checksums;
enum ifla_geneve_df df;
bool ttl_inherit;
int err;
......@@ -1539,7 +1625,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
ttl_inherit = geneve->ttl_inherit;
err = geneve_nl2info(tb, data, extack, &info, &metadata,
&use_udp6_rx_checksums, &ttl_inherit, true);
&use_udp6_rx_checksums, &ttl_inherit, &df, true);
if (err)
return err;
......@@ -1572,6 +1658,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
......@@ -1620,6 +1707,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
goto nla_put_failure;
if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
goto nla_put_failure;
......@@ -1671,7 +1761,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
return dev;
init_tnl_info(&info, dst_port);
err = geneve_configure(net, dev, NULL, &info, true, true, false);
err = geneve_configure(net, dev, NULL, &info,
true, true, false, GENEVE_DF_UNSET);
if (err) {
free_netdev(dev);
return ERR_PTR(err);
......
......@@ -1552,6 +1552,34 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
struct vxlanhdr *hdr;
__be32 vni;
if (skb->len < VXLAN_HLEN)
return -EINVAL;
hdr = vxlan_hdr(skb);
if (!(hdr->vx_flags & VXLAN_HF_VNI))
return -EINVAL;
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
return -ENOENT;
vni = vxlan_vni(hdr->vx_vni);
vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
if (!vxlan)
return -ENOENT;
return 0;
}
static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
......@@ -2250,13 +2278,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
/* Bypass encapsulation if the destination is local */
if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, dst,
dst_port, ifindex, vni,
&rt->dst, rt->rt_flags);
if (err)
goto out_unlock;
if (vxlan->cfg.df == VXLAN_DF_SET) {
df = htons(IP_DF);
} else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
struct ethhdr *eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
(ntohs(eth->h_proto) == ETH_P_IP &&
old_iph->frag_off & htons(IP_DF)))
df = htons(IP_DF);
}
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
}
......@@ -2809,6 +2848,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
......@@ -2865,6 +2905,16 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
if (data[IFLA_VXLAN_DF]) {
enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
if (df < 0 || df > VXLAN_DF_MAX) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF],
"Invalid DF attribute");
return -EINVAL;
}
}
return 0;
}
......@@ -2948,6 +2998,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
tunnel_cfg.sk_user_data = vs;
tunnel_cfg.encap_type = 1;
tunnel_cfg.encap_rcv = vxlan_rcv;
tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
tunnel_cfg.gro_receive = vxlan_gro_receive;
tunnel_cfg.gro_complete = vxlan_gro_complete;
......@@ -3509,6 +3560,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->mtu = nla_get_u32(tb[IFLA_MTU]);
}
if (data[IFLA_VXLAN_DF])
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
return 0;
}
......@@ -3601,6 +3655,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
......@@ -3667,6 +3722,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
......
......@@ -77,6 +77,7 @@ struct udp_sock {
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
/* GRO functions for UDP socket */
......
......@@ -41,7 +41,7 @@ struct net;
void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
int icmp_rcv(struct sk_buff *skb);
void icmp_err(struct sk_buff *skb, u32 info);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
void icmp_out_count(struct net *net, unsigned char type);
......
......@@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi6 *fl6);
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
};
#ifdef CONFIG_INET
......
......@@ -311,6 +311,7 @@ struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4);
int (*err_handler)(struct sk_buff *skb, u32 info);
};
#define MAX_IPTUN_ENCAP_OPS 8
......
......@@ -42,7 +42,10 @@ struct net_protocol {
int (*early_demux)(struct sk_buff *skb);
int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
/* This returns an error if we weren't able to handle the error. */
int (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
netns_ok:1,
/* does the protocol do more stringent
......@@ -58,10 +61,12 @@ struct inet6_protocol {
void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb,
/* This returns an error if we weren't able to handle the error. */
int (*err_handler)(struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset,
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
};
......
......@@ -151,7 +151,7 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc,
* sctp/input.c
*/
int sctp_rcv(struct sk_buff *skb);
void sctp_v4_err(struct sk_buff *skb, u32 info);
int sctp_v4_err(struct sk_buff *skb, u32 info);
void sctp_hash_endpoint(struct sctp_endpoint *);
void sctp_unhash_endpoint(struct sctp_endpoint *);
struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
......
......@@ -313,7 +313,7 @@ extern struct proto tcp_prot;
void tcp_tasklet_init(void);
void tcp_v4_err(struct sk_buff *skb, u32);
int tcp_v4_err(struct sk_buff *skb, u32);
void tcp_shutdown(struct sock *sk, int how);
......
......@@ -283,7 +283,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
int udp_get_port(struct sock *sk, unsigned short snum,
int (*saddr_cmp)(const struct sock *,
const struct sock *));
void udp_err(struct sk_buff *, u32);
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udp_push_pending_frames(struct sock *sk);
......
......@@ -64,6 +64,8 @@ static inline int udp_sock_create(struct net *net,
}
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
struct sk_buff *skb);
typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
struct list_head *head,
......@@ -76,6 +78,7 @@ struct udp_tunnel_sock_cfg {
/* Used for setting up udp_sock fields, see udp.h for details */
__u8 encap_type;
udp_tunnel_encap_rcv_t encap_rcv;
udp_tunnel_encap_err_lookup_t encap_err_lookup;
udp_tunnel_encap_destroy_t encap_destroy;
udp_tunnel_gro_receive_t gro_receive;
udp_tunnel_gro_complete_t gro_complete;
......
......@@ -216,6 +216,7 @@ struct vxlan_config {
unsigned long age_interval;
unsigned int addrmax;
bool no_share;
enum ifla_vxlan_df df;
};
struct vxlan_dev_node {
......
......@@ -533,6 +533,7 @@ enum {
IFLA_VXLAN_LABEL,
IFLA_VXLAN_GPE,
IFLA_VXLAN_TTL_INHERIT,
IFLA_VXLAN_DF,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
......@@ -542,6 +543,14 @@ struct ifla_vxlan_port_range {
__be16 high;
};
enum ifla_vxlan_df {
VXLAN_DF_UNSET = 0,
VXLAN_DF_SET,
VXLAN_DF_INHERIT,
__VXLAN_DF_END,
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
......@@ -557,10 +566,19 @@ enum {
IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
IFLA_GENEVE_LABEL,
IFLA_GENEVE_TTL_INHERIT,
IFLA_GENEVE_DF,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
enum ifla_geneve_df {
GENEVE_DF_UNSET = 0,
GENEVE_DF_SET,
GENEVE_DF_INHERIT,
__GENEVE_DF_END,
GENEVE_DF_MAX = __GENEVE_DF_END - 1,
};
/* PPP section */
enum {
IFLA_PPP_UNSPEC,
......
......@@ -231,7 +231,7 @@ EXPORT_SYMBOL(dccp_req_err);
* check at all. A more general error queue to queue errors for later handling
* is probably better.
*/
static void dccp_v4_err(struct sk_buff *skb, u32 info)
static int dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const u8 offset = iph->ihl << 2;
......@@ -259,16 +259,18 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
return -ENOENT;
}
if (sk->sk_state == DCCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
return 0;
}
seq = dccp_hdr_seq(dh);
if (sk->sk_state == DCCP_NEW_SYN_RECV)
return dccp_req_err(sk, seq);
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
dccp_req_err(sk, seq);
return 0;
}
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
......@@ -357,6 +359,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
......
......@@ -68,7 +68,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
}
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
......@@ -96,16 +96,18 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
return;
return -ENOENT;
}
if (sk->sk_state == DCCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
return 0;
}
seq = dccp_hdr_seq(dh);
if (sk->sk_state == DCCP_NEW_SYN_RECV)
return dccp_req_err(sk, seq);
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
dccp_req_err(sk, seq);
return 0;
}
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
......@@ -183,6 +185,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
......
......@@ -3,6 +3,7 @@
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
......@@ -1003,15 +1004,82 @@ static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
{
const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
if (ipprot && ipprot->err_handler) {
if (!ipprot->err_handler(skb, info))
return 0;
}
return -ENOENT;
}
static int gue_err(struct sk_buff *skb, u32 info)
{
int transport_offset = skb_transport_offset(skb);
struct guehdr *guehdr;
size_t optlen;
int ret;
if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
switch (guehdr->version) {
case 0: /* Full GUE header present */
break;
case 1: {
/* Direct encasulation of IPv4 or IPv6 */
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
switch (((struct iphdr *)guehdr)->version) {
case 4:
ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
goto out;
#if IS_ENABLED(CONFIG_IPV6)
case 6:
ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
goto out;
#endif
default:
ret = -EOPNOTSUPP;
goto out;
}
}
default: /* Undefined version */
return -EOPNOTSUPP;
}
if (guehdr->control)
return -ENOENT;
optlen = guehdr->hlen << 2;
if (validate_gue_flags(guehdr, optlen))
return -EINVAL;
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
out:
skb_set_transport_header(skb, transport_offset);
return ret;
}
static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou_build_header,
.err_handler = gue_err,
};
static const struct ip_tunnel_encap_ops gue_iptun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue_build_header,
.err_handler = gue_err,
};
static int ip_tunnel_encap_add_fou_ops(void)
......
......@@ -151,20 +151,25 @@ static int gre_rcv(struct sk_buff *skb)
return NET_RX_DROP;
}
static void gre_err(struct sk_buff *skb, u32 info)
static int gre_err(struct sk_buff *skb, u32 info)
{
const struct gre_protocol *proto;
const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
int err = 0;
if (ver >= GREPROTO_MAX)
return;
return -EINVAL;
rcu_read_lock();
proto = rcu_dereference(gre_proto[ver]);
if (proto && proto->err_handler)
proto->err_handler(skb, info);
else
err = -EPROTONOSUPPORT;
rcu_read_unlock();
return err;
}
static const struct net_protocol net_gre_protocol = {
......
......@@ -1079,7 +1079,7 @@ int icmp_rcv(struct sk_buff *skb)
goto drop;
}
void icmp_err(struct sk_buff *skb, u32 info)
int icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
int offset = iph->ihl<<2;
......@@ -1094,13 +1094,15 @@ void icmp_err(struct sk_buff *skb, u32 info)
*/
if (icmph->type != ICMP_ECHOREPLY) {
ping_err(skb, offset, info);
return;
return 0;
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
else if (type == ICMP_REDIRECT)
ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
return 0;
}
/*
......
......@@ -121,7 +121,7 @@ static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
static unsigned int erspan_net_id __read_mostly;
static void ipgre_err(struct sk_buff *skb, u32 info,
static int ipgre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
{
......@@ -146,17 +146,32 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
itn = net_generic(net, gre_tap_net_id);
else if (tpi->proto == htons(ETH_P_ERSPAN) ||
tpi->proto == htons(ETH_P_ERSPAN2))
itn = net_generic(net, erspan_net_id);
else
itn = net_generic(net, ipgre_net_id);
iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
iph->daddr, iph->saddr, tpi->key);
if (!t)
return -ENOENT;
switch (type) {
default:
case ICMP_PARAMETERPROB:
return;
return 0;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
return;
return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
......@@ -168,7 +183,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return;
return 0;
data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
......@@ -176,40 +191,27 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
break;
}
if (tpi->proto == htons(ETH_P_TEB))
itn = net_generic(net, gre_tap_net_id);
else if (tpi->proto == htons(ETH_P_ERSPAN) ||
tpi->proto == htons(ETH_P_ERSPAN2))
itn = net_generic(net, erspan_net_id);
else
itn = net_generic(net, ipgre_net_id);
iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
iph->daddr, iph->saddr, tpi->key);
if (!t)
return;
#if IS_ENABLED(CONFIG_IPV6)
if (tpi->proto == htons(ETH_P_IPV6) &&
!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
type, data_len))
return;
return 0;
#endif
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
return;
return 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
return;
return 0;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
return 0;
}
static void gre_err(struct sk_buff *skb, u32 info)
......
......@@ -140,6 +140,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
struct ip_tunnel *t;
int err = 0;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
if (!t) {
err = -ENOENT;
goto out;
}
switch (type) {
case ICMP_DEST_UNREACH:
switch (code) {
......@@ -167,13 +174,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
goto out;
}
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
if (!t) {
err = -ENOENT;
goto out;
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol);
goto out;
......
......@@ -29,6 +29,7 @@
#include <net/protocol.h>
struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_protos);
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
......
......@@ -423,7 +423,7 @@ EXPORT_SYMBOL(tcp_req_err);
*
*/
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
......@@ -446,20 +446,21 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
inet_iif(icmp_skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
return 0;
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
return tcp_req_err(sk, seq,
type == ICMP_PARAMETERPROB ||
if (sk->sk_state == TCP_NEW_SYN_RECV) {
tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
type == ICMP_TIME_EXCEEDED ||
(type == ICMP_DEST_UNREACH &&
(code == ICMP_NET_UNREACH ||
code == ICMP_HOST_UNREACH)));
return 0;
}
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
......@@ -613,6 +614,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
......
......@@ -149,34 +149,40 @@ static int tunnelmpls4_rcv(struct sk_buff *skb)
}
#endif
static void tunnel4_err(struct sk_buff *skb, u32 info)
static int tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnel4_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
#if IS_ENABLED(CONFIG_IPV6)
static void tunnel64_err(struct sk_buff *skb, u32 info)
static int tunnel64_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnel64_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
#endif
#if IS_ENABLED(CONFIG_MPLS)
static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
static int tunnelmpls4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
#endif
......
......@@ -105,6 +105,7 @@
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
#include <net/ip_tunnels.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
......@@ -583,6 +584,89 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
static_branch_enable(&udp_encap_needed_key);
}
EXPORT_SYMBOL(udp_encap_enable);
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
* through error handlers in encapsulations looking for a match.
*/
static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
{
int i;
for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
int (*handler)(struct sk_buff *skb, u32 info);
if (!iptun_encaps[i])
continue;
handler = rcu_dereference(iptun_encaps[i]->err_handler);
if (handler && !handler(skb, info))
return 0;
}
return -ENOENT;
}
/* Try to match ICMP errors to UDP tunnels by looking up a socket without
* reversing source and destination port: this will match tunnels that force the
* same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
* lwtunnels might actually break this assumption by being configured with
* different destination ports on endpoints, in this case we won't be able to
* trace ICMP messages back to them.
*
* If this doesn't match any socket, probe tunnels with arbitrary destination
* ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
* we've sent packets to won't necessarily match the local destination port.
*
* Then ask the tunnel implementation to match the error against a valid
* association.
*
* Return an error if we can't find a match, the socket if we need further
* processing, zero otherwise.
*/
static struct sock *__udp4_lib_err_encap(struct net *net,
const struct iphdr *iph,
struct udphdr *uh,
struct udp_table *udptable,
struct sk_buff *skb, u32 info)
{
int network_offset, transport_offset;
struct sock *sk;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
/* Network header needs to point to the outer IPv4 header inside ICMP */
skb_reset_network_header(skb);
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, iph->ihl << 2);
sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
iph->saddr, uh->dest, skb->dev->ifindex, 0,
udptable, NULL);
if (sk) {
int (*lookup)(struct sock *sk, struct sk_buff *skb);
struct udp_sock *up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
if (!sk)
sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
skb_set_transport_header(skb, transport_offset);
skb_set_network_header(skb, network_offset);
return sk;
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
......@@ -594,13 +678,14 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
* to find the appropriate port.
*/
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
{
struct inet_sock *inet;
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
bool tunnel = false;
struct sock *sk;
int harderr;
int err;
......@@ -610,8 +695,21 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
if (!sk) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udp_encap_needed_key)) {
sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
info);
if (!sk)
return 0;
}
if (IS_ERR(sk)) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
return PTR_ERR(sk);
}
tunnel = true;
}
err = 0;
......@@ -654,6 +752,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
if (tunnel) {
/* ...not for tunnels though: we don't have a sending socket */
goto out;
}
if (!inet->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
......@@ -663,12 +765,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk->sk_err = err;
sk->sk_error_report(sk);
out:
return;
return 0;
}
void udp_err(struct sk_buff *skb, u32 info)
int udp_err(struct sk_buff *skb, u32 info)
{
__udp4_lib_err(skb, info, &udp_table);
return __udp4_lib_err(skb, info, &udp_table);
}
/*
......@@ -1891,13 +1993,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
static_branch_enable(&udp_encap_needed_key);
}
EXPORT_SYMBOL(udp_encap_enable);
/* returns:
* -1: error
* 0: success
......
......@@ -7,7 +7,7 @@
#include <net/inet_common.h>
int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int);
void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
int udp_v4_get_port(struct sock *sk, unsigned short snum);
......
......@@ -68,6 +68,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;
......
......@@ -25,9 +25,9 @@ static int udplite_rcv(struct sk_buff *skb)
return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
}
static void udplite_err(struct sk_buff *skb, u32 info)
static int udplite_err(struct sk_buff *skb, u32 info)
{
__udp4_lib_err(skb, info, &udplite_table);
return __udp4_lib_err(skb, info, &udplite_table);
}
static const struct net_protocol udplite_protocol = {
......
......@@ -106,13 +106,15 @@ static int xfrm4_esp_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm4_esp_err(struct sk_buff *skb, u32 info)
static int xfrm4_esp_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(esp4_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
static int xfrm4_ah_rcv(struct sk_buff *skb)
......@@ -132,13 +134,15 @@ static int xfrm4_ah_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm4_ah_err(struct sk_buff *skb, u32 info)
static int xfrm4_ah_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(ah4_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
......@@ -158,13 +162,15 @@ static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
static int xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(ipcomp4_handlers, handler)
if (!handler->err_handler(skb, info))
break;
return 0;
return -ENOENT;
}
static const struct net_protocol esp4_protocol = {
......
......@@ -4,6 +4,7 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/icmpv6.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/fou.h>
......@@ -69,14 +70,87 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, u32 info)
{
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[proto]);
if (ipprot && ipprot->err_handler) {
if (!ipprot->err_handler(skb, opt, type, code, offset, info))
return 0;
}
return -ENOENT;
}
static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
int transport_offset = skb_transport_offset(skb);
struct guehdr *guehdr;
size_t optlen;
int ret;
if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
switch (guehdr->version) {
case 0: /* Full GUE header present */
break;
case 1: {
/* Direct encasulation of IPv4 or IPv6 */
skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
switch (((struct iphdr *)guehdr)->version) {
case 4:
ret = gue6_err_proto_handler(IPPROTO_IPIP, skb, opt,
type, code, offset, info);
goto out;
case 6:
ret = gue6_err_proto_handler(IPPROTO_IPV6, skb, opt,
type, code, offset, info);
goto out;
default:
ret = -EOPNOTSUPP;
goto out;
}
}
default: /* Undefined version */
return -EOPNOTSUPP;
}
if (guehdr->control)
return -ENOENT;
optlen = guehdr->hlen << 2;
if (validate_gue_flags(guehdr, optlen))
return -EINVAL;
skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
ret = gue6_err_proto_handler(guehdr->proto_ctype, skb,
opt, type, code, offset, info);
out:
skb_set_transport_header(skb, transport_offset);
return ret;
}
static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou6_build_header,
.err_handler = gue6_err,
};
static const struct ip6_tnl_encap_ops gue_ip6tun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue6_build_header,
.err_handler = gue6_err,
};
static int ip6_tnl_encap_add_fou_ops(void)
......
......@@ -84,7 +84,7 @@ static inline struct sock *icmpv6_sk(struct net *net)
return net->ipv6.icmp_sk[smp_processor_id()];
}
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
......@@ -100,6 +100,8 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!(type & ICMPV6_INFOMSG_MASK))
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
ping_err(skb, offset, ntohl(info));
return 0;
}
static int icmpv6_rcv(struct sk_buff *skb);
......
......@@ -423,7 +423,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
}
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
......@@ -433,13 +433,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
offset) < 0)
return;
return -EINVAL;
ipv6h = (const struct ipv6hdr *)skb->data;
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
tpi.key, tpi.proto);
if (!t)
return;
return -ENOENT;
switch (type) {
struct ipv6_tlv_tnl_enc_lim *tel;
......@@ -449,14 +449,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
if (code != ICMPV6_PORT_UNREACH)
break;
return;
return 0;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
break;
}
return;
return 0;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
......@@ -472,14 +472,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
return;
return 0;
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return;
return 0;
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
return;
return 0;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
......@@ -487,6 +487,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
else
t->err_count = 1;
t->err_time = jiffies;
return 0;
}
static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
......
......@@ -349,7 +349,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk)
}
}
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
......@@ -371,17 +371,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
return;
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
return 0;
}
seq = ntohl(th->seq);
fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
return tcp_req_err(sk, seq, fatal);
if (sk->sk_state == TCP_NEW_SYN_RECV) {
tcp_req_err(sk, seq, fatal);
return 0;
}
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
......@@ -467,6 +469,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
......
......@@ -134,24 +134,28 @@ static int tunnel46_rcv(struct sk_buff *skb)
return 0;
}
static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
for_each_tunnel_rcu(tunnel6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
break;
return 0;
return -ENOENT;
}
static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
for_each_tunnel_rcu(tunnel46_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
break;
return 0;
return -ENOENT;
}
static const struct inet6_protocol tunnel6_protocol = {
......
......@@ -45,6 +45,7 @@
#include <net/raw.h>
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
......@@ -462,7 +463,97 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto try_again;
}
void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_enable(&udpv6_encap_needed_key);
}
EXPORT_SYMBOL(udpv6_encap_enable);
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
* through error handlers in encapsulations looking for a match.
*/
static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, u32 info)
{
int i;
for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, u32 info);
if (!ip6tun_encaps[i])
continue;
handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
if (handler && !handler(skb, opt, type, code, offset, info))
return 0;
}
return -ENOENT;
}
/* Try to match ICMP errors to UDP tunnels by looking up a socket without
* reversing source and destination port: this will match tunnels that force the
* same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
* lwtunnels might actually break this assumption by being configured with
* different destination ports on endpoints, in this case we won't be able to
* trace ICMP messages back to them.
*
* If this doesn't match any socket, probe tunnels with arbitrary destination
* ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
* we've sent packets to won't necessarily match the local destination port.
*
* Then ask the tunnel implementation to match the error against a valid
* association.
*
* Return an error if we can't find a match, the socket if we need further
* processing, zero otherwise.
*/
static struct sock *__udp6_lib_err_encap(struct net *net,
const struct ipv6hdr *hdr, int offset,
struct udphdr *uh,
struct udp_table *udptable,
struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, __be32 info)
{
int network_offset, transport_offset;
struct sock *sk;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
/* Network header needs to point to the outer IPv6 header inside ICMP */
skb_reset_network_header(skb);
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, offset);
sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
&hdr->saddr, uh->dest,
inet6_iif(skb), 0, udptable, skb);
if (sk) {
int (*lookup)(struct sock *sk, struct sk_buff *skb);
struct udp_sock *up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
if (!sk) {
sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
offset, info));
}
skb_set_transport_header(skb, transport_offset);
skb_set_network_header(skb, network_offset);
return sk;
}
int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info,
struct udp_table *udptable)
{
......@@ -471,6 +562,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
const struct in6_addr *saddr = &hdr->saddr;
const struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr *)(skb->data+offset);
bool tunnel = false;
struct sock *sk;
int harderr;
int err;
......@@ -479,9 +571,23 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, skb);
if (!sk) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
udptable, skb,
opt, type, code, info);
if (!sk)
return 0;
}
if (IS_ERR(sk)) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
return;
return PTR_ERR(sk);
}
tunnel = true;
}
harderr = icmpv6_err_convert(type, code, &err);
......@@ -495,10 +601,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
harderr = 1;
}
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
sk->sk_mark, sk->sk_uid);
} else {
ip6_sk_redirect(skb, sk);
}
goto out;
}
/* Tunnels don't have an application socket: don't pass errors back */
if (tunnel)
goto out;
if (!np->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
......@@ -509,7 +624,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
return;
return 0;
}
static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
......@@ -540,20 +655,13 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
static __inline__ void udpv6_err(struct sk_buff *skb,
static __inline__ int udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, u8 type,
u8 code, int offset, __be32 info)
{
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_enable(&udpv6_encap_needed_key);
}
EXPORT_SYMBOL(udpv6_encap_enable);
static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
......
......@@ -9,7 +9,7 @@
#include <net/transp_v6.h>
int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
__be32, struct udp_table *);
int udp_v6_get_port(struct sock *sk, unsigned short snum);
......
......@@ -20,11 +20,12 @@ static int udplitev6_rcv(struct sk_buff *skb)
return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
}
static void udplitev6_err(struct sk_buff *skb,
static int udplitev6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
return __udp6_lib_err(skb, opt, type, code, offset, info,
&udplite_table);
}
static const struct inet6_protocol udplitev6_protocol = {
......
......@@ -80,14 +80,16 @@ static int xfrm6_esp_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(esp6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
break;
return 0;
return -ENOENT;
}
static int xfrm6_ah_rcv(struct sk_buff *skb)
......@@ -107,14 +109,16 @@ static int xfrm6_ah_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(ah6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
break;
return 0;
return -ENOENT;
}
static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
......@@ -134,14 +138,16 @@ static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
return 0;
}
static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(ipcomp6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
break;
return 0;
return -ENOENT;
}
static const struct inet6_protocol esp6_protocol = {
......
......@@ -574,7 +574,7 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
* is probably better.
*
*/
void sctp_v4_err(struct sk_buff *skb, __u32 info)
int sctp_v4_err(struct sk_buff *skb, __u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
const int ihlen = iph->ihl * 4;
......@@ -599,7 +599,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
skb->transport_header = savesctp;
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
return -ENOENT;
}
/* Warning: The sock lock is held. Remember to call
* sctp_err_finish!
......@@ -653,6 +653,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
out_unlock:
sctp_err_finish(sk, transport);
return 0;
}
/*
......
......@@ -138,7 +138,7 @@ static struct notifier_block sctp_inet6addr_notifier = {
};
/* ICMP error handler. */
static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct inet6_dev *idev;
......@@ -147,7 +147,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sctp_transport *transport;
struct ipv6_pinfo *np;
__u16 saveip, savesctp;
int err;
int err, ret = 0;
struct net *net = dev_net(skb->dev);
idev = in6_dev_get(skb->dev);
......@@ -163,6 +163,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
skb->transport_header = savesctp;
if (!sk) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
ret = -ENOENT;
goto out;
}
......@@ -202,6 +203,8 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
out:
if (likely(idev != NULL))
in6_dev_put(idev);
return ret;
}
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment