Commit 212ee4b5 authored by David S. Miller's avatar David S. Miller

Merge branch 'rfc8335-probe'

Andreas Roeseler says:

====================
add support for RFC 8335 PROBE

The popular utility ping has several severe limitations, such as the
inability to query specific interfaces on a node and requiring
bidirectional connectivity between the probing and probed interfaces.
RFC 8335 attempts to solve these limitations by creating the new utility
PROBE which is a specialized ICMP message that makes use of the ICMP
Extension Structure outlined in RFC 4884.

This patchset adds definitions for the ICMP Extended Echo Request and
Reply (PROBE) types for both IPV4 and IPV6, adds a sysctl to enable
responses to PROBE messages, expands the list of supported ICMP messages
to accommodate PROBE types, adds ipv6_dev_find into ipv6_stubs, and adds
functionality to respond to PROBE requests.

Changes:
v1 -> v2:
 - Add AFI definitions
 - Switch to functions such as dev_get_by_name and ip_dev_find to lookup
   net devices

v2 -> v3:
Suggested by Willem de Bruijn <willemdebruijn.kernel@gmail.com>
 - Add verification of incoming messages before looking up netdev
 - Add prefix for PROBE specific defined variables
 - Use proc_dointvec_minmax with zero and one for sysctl
 - Create struct icmp_ext_echo_iio for parsing incoming packets
Reported-by: default avatarkernel test robot <lkp@intel.com>
Reported-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
 - Include net/addrconf.h library for ipv6_dev_find

v3 -> v4:
 - Use in_addr instead of __be32 for storing IPV4 addresses
 - Use IFNAMSIZ to statically allocate space for name in
   icmp_ext_echo_iio
Suggested by Willem de Bruijn <willemdebruijn.kernel@gmail.com>
 - Use skb_header_pointer to verify fields in incoming message
 - Add check to ensure that extobj_hdr.length is valid
 - Check to ensure object payload is padded with ASCII NULL characters
   when probing by name, as specified by RFC 8335
 - Statically allocate buff using IFNAMSIZ
 - Add rcu blocking around ipv6_dev_find
 - Use __in_dev_get_rcu to access IPV4 addresses of identified
   net_device
 - Remove check for ICMPV6 PROBE types

v4 -> v5:
 - Statically allocate buff to size IFNAMSIZ on declaration
 - Remove goto probe in favor of single branch
 - Remove strict check for incoming PROBE request padding to nearest
   32-bit boundary
Reported-by: default avatarkernel test robot <lkp@intel.com>

v5 -> v6:
 - Add documentation for icmp_echo_enable_probe sysctl
 - Remove RCU locking around ipv6_dev_find()
 - Assign iio based on ctype
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fdb5cc6a d329ea5b
......@@ -1143,6 +1143,12 @@ icmp_echo_ignore_all - BOOLEAN
Default: 0
icmp_echo_enable_probe - BOOLEAN
If set to one, then the kernel will respond to RFC 8335 PROBE
requests sent to it.
Default: 0
icmp_echo_ignore_broadcasts - BOOLEAN
If set non-zero, then the kernel will ignore all ICMP ECHO and
TIMESTAMP requests sent to it via broadcast/multicast.
......
......@@ -66,6 +66,8 @@ struct ipv6_stub {
int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
struct net_device *dev);
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
......
......@@ -84,6 +84,7 @@ struct netns_ipv4 {
#endif
u8 sysctl_icmp_echo_ignore_all;
u8 sysctl_icmp_echo_enable_probe;
u8 sysctl_icmp_echo_ignore_broadcasts;
u8 sysctl_icmp_ignore_bogus_error_responses;
u8 sysctl_icmp_errors_use_inbound_ifaddr;
......
......@@ -20,6 +20,9 @@
#include <linux/types.h>
#include <asm/byteorder.h>
#include <linux/in.h>
#include <linux/if.h>
#include <linux/in6.h>
#define ICMP_ECHOREPLY 0 /* Echo Reply */
#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */
......@@ -66,6 +69,23 @@
#define ICMP_EXC_TTL 0 /* TTL count exceeded */
#define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */
/* Codes for EXT_ECHO (PROBE) */
#define ICMP_EXT_ECHO 42
#define ICMP_EXT_ECHOREPLY 43
#define ICMP_EXT_MAL_QUERY 1 /* Malformed Query */
#define ICMP_EXT_NO_IF 2 /* No such Interface */
#define ICMP_EXT_NO_TABLE_ENT 3 /* No such Table Entry */
#define ICMP_EXT_MULT_IFS 4 /* Multiple Interfaces Satisfy Query */
/* Constants for EXT_ECHO (PROBE) */
#define EXT_ECHOREPLY_ACTIVE (1 << 2)/* active bit in reply message */
#define EXT_ECHOREPLY_IPV4 (1 << 1)/* ipv4 bit in reply message */
#define EXT_ECHOREPLY_IPV6 1 /* ipv6 bit in reply message */
#define EXT_ECHO_CTYPE_NAME 1
#define EXT_ECHO_CTYPE_INDEX 2
#define EXT_ECHO_CTYPE_ADDR 3
#define ICMP_AFI_IP 1 /* Address Family Identifier for ipv4 */
#define ICMP_AFI_IP6 2 /* Address Family Identifier for ipv6 */
struct icmphdr {
__u8 type;
......@@ -118,4 +138,26 @@ struct icmp_extobj_hdr {
__u8 class_type;
};
/* RFC 8335: 2.1 Header for c-type 3 payload */
struct icmp_ext_echo_ctype3_hdr {
__be16 afi;
__u8 addrlen;
__u8 reserved;
};
/* RFC 8335: 2.1 Interface Identification Object */
struct icmp_ext_echo_iio {
struct icmp_extobj_hdr extobj_hdr;
union {
char name[IFNAMSIZ];
__be32 ifindex;
struct {
struct icmp_ext_echo_ctype3_hdr ctype3_hdr;
union {
struct in_addr ipv4_addr;
struct in6_addr ipv6_addr;
} ip_addr;
} addr;
} ident;
};
#endif /* _UAPI_LINUX_ICMP_H */
......@@ -140,6 +140,9 @@ struct icmp6hdr {
#define ICMPV6_UNK_OPTION 2
#define ICMPV6_HDR_INCOMP 3
/* Codes for EXT_ECHO (PROBE) */
#define ICMPV6_EXT_ECHO_REQUEST 160
#define ICMPV6_EXT_ECHO_REPLY 161
/*
* constants for (set|get)sockopt
*/
......
......@@ -971,7 +971,7 @@ static bool icmp_redirect(struct sk_buff *skb)
}
/*
* Handle ICMP_ECHO ("ping") requests.
* Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests.
*
* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
* requests.
......@@ -979,27 +979,125 @@ static bool icmp_redirect(struct sk_buff *skb)
* included in the reply.
* RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
* echo requests, MUST have default=NOT.
* RFC 8335: 8 MUST have a config option to enable/disable ICMP
* Extended Echo Functionality, MUST be disabled by default
* See also WRT handling of options once they are done and working.
*/
static bool icmp_echo(struct sk_buff *skb)
{
struct icmp_ext_hdr *ext_hdr, _ext_hdr;
struct icmp_ext_echo_iio *iio, _iio;
struct icmp_bxm icmp_param;
struct net_device *dev;
char buff[IFNAMSIZ];
struct net *net;
u16 ident_len;
u8 status;
net = dev_net(skb_dst(skb)->dev);
if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
struct icmp_bxm icmp_param;
/* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all)
return true;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
icmp_param.offset = 0;
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_param.data.icmph = *icmp_hdr(skb);
if (icmp_param.data.icmph.type == ICMP_ECHO) {
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
icmp_param.skb = skb;
icmp_param.offset = 0;
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
goto send_reply;
}
/* should there be an ICMP stat for ignored echos? */
return true;
if (!net->ipv4.sysctl_icmp_echo_enable_probe)
return true;
/* We currently only support probing interfaces on the proxy node
* Check to ensure L-bit is set
*/
if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1))
return true;
/* Clear status bits in reply message */
icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
/* Size of iio is class_type dependent.
* Only check header here and assign length based on ctype in the switch statement
*/
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
if (!ext_hdr || !iio)
goto send_mal_query;
if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
status = 0;
dev = NULL;
switch (iio->extobj_hdr.class_type) {
case EXT_ECHO_CTYPE_NAME:
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len >= IFNAMSIZ)
goto send_mal_query;
memset(buff, 0, sizeof(buff));
memcpy(buff, &iio->ident.name, ident_len);
dev = dev_get_by_name(net, buff);
break;
case EXT_ECHO_CTYPE_INDEX:
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
sizeof(iio->ident.ifindex), &_iio);
if (ident_len != sizeof(iio->ident.ifindex))
goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
break;
case EXT_ECHO_CTYPE_ADDR:
if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen)
goto send_mal_query;
switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
case ICMP_AFI_IP:
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
sizeof(struct in_addr), &_iio);
if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
sizeof(struct in_addr))
goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
if (dev)
dev_hold(dev);
break;
#endif
default:
goto send_mal_query;
}
break;
default:
goto send_mal_query;
}
if (!dev) {
icmp_param.data.icmph.code = ICMP_EXT_NO_IF;
goto send_reply;
}
/* Fill bits in reply message */
if (dev->flags & IFF_UP)
status |= EXT_ECHOREPLY_ACTIVE;
if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
status |= EXT_ECHOREPLY_IPV4;
if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
status |= EXT_ECHOREPLY_IPV6;
dev_put(dev);
icmp_param.data.icmph.un.echo.sequence |= htons(status);
send_reply:
icmp_reply(&icmp_param, skb);
return true;
send_mal_query:
icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY;
goto send_reply;
}
/*
......@@ -1088,6 +1186,16 @@ int icmp_rcv(struct sk_buff *skb)
icmph = icmp_hdr(skb);
ICMPMSGIN_INC_STATS(net, icmph->type);
/* Check for ICMP Extended Echo (PROBE) messages */
if (icmph->type == ICMP_EXT_ECHO) {
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
success = icmp_echo(skb);
goto success_check;
}
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
......@@ -1097,7 +1205,6 @@ int icmp_rcv(struct sk_buff *skb)
if (icmph->type > NR_ICMP_TYPES)
goto error;
/*
* Parse the ICMP message
*/
......@@ -1123,7 +1230,7 @@ int icmp_rcv(struct sk_buff *skb)
}
success = icmp_pointers[icmph->type].handler(skb);
success_check:
if (success) {
consume_skb(skb);
return NET_RX_SUCCESS;
......@@ -1340,6 +1447,7 @@ static int __net_init icmp_sk_init(struct net *net)
/* Control parameters for ECHO replies. */
net->ipv4.sysctl_icmp_echo_ignore_all = 0;
net->ipv4.sysctl_icmp_echo_enable_probe = 0;
net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
/* Control parameter - ignore bogus broadcast responses? */
......
......@@ -453,7 +453,9 @@ EXPORT_SYMBOL_GPL(ping_bind);
static inline int ping_supported(int family, int type, int code)
{
return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
(family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
(family == AF_INET && type == ICMP_EXT_ECHO && code == 0) ||
(family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) ||
(family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0);
}
/*
......
......@@ -598,6 +598,15 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
},
{
.procname = "icmp_echo_enable_probe",
.data = &init_net.ipv4.sysctl_icmp_echo_enable_probe,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{
.procname = "icmp_echo_ignore_broadcasts",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
......
......@@ -198,6 +198,12 @@ static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct s
return -EAFNOSUPPORT;
}
static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr,
struct net_device *dev)
{
return ERR_PTR(-EAFNOSUPPORT);
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
.ipv6_route_input = eafnosupport_ipv6_route_input,
......@@ -209,6 +215,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.fib6_nh_init = eafnosupport_fib6_nh_init,
.ip6_del_rt = eafnosupport_ip6_del_rt,
.ipv6_fragment = eafnosupport_ipv6_fragment,
.ipv6_dev_find = eafnosupport_ipv6_dev_find,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
......
......@@ -1032,6 +1032,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
#endif
.nd_tbl = &nd_tbl,
.ipv6_fragment = ip6_fragment,
.ipv6_dev_find = ipv6_dev_find,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment