Commit b9fcf0a0 authored by David S. Miller's avatar David S. Miller

Merge branch 'support-AF_PACKET-for-layer-3-devices'

Jason A. Donenfeld says:

====================
support AF_PACKET for layer 3 devices

Hans reported that packets injected by a correct-looking and trivial
libpcap-based program were not being accepted by wireguard. In
investigating that, I noticed that a few devices weren't properly
handling AF_PACKET-injected packets, and so this series introduces a bit
of shared infrastructure to support that.

The basic problem begins with socket(AF_PACKET, SOCK_RAW,
htons(ETH_P_ALL)) sockets. When sendto is called, AF_PACKET examines the
headers of the packet with this logic:

static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
    if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
        sock->type == SOCK_RAW) {
        skb_reset_mac_header(skb);
        skb->protocol = dev_parse_header_protocol(skb);
    }

    skb_probe_transport_header(skb);
}

The middle condition there triggers, and we jump to
dev_parse_header_protocol. Note that this is the only caller of
dev_parse_header_protocol in the kernel, and I assume it was designed
for this purpose:

static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb)
{
    const struct net_device *dev = skb->dev;

    if (!dev->header_ops || !dev->header_ops->parse_protocol)
        return 0;
    return dev->header_ops->parse_protocol(skb);
}

Since AF_PACKET already knows which netdev the packet is going to, the
dev_parse_header_protocol function can see if that netdev has a way it
prefers to figure out the protocol from the header. This, again, is the
only use of parse_protocol in the kernel. At the moment, it's only used
with ethernet devices, via eth_header_parse_protocol. This makes sense,
as mostly people are used to AF_PACKET-injecting ethernet frames rather
than layer 3 frames. But with nothing in place for layer 3 netdevs, this
function winds up returning 0, and skb->protocol then is set to 0, and
then by the time it hits the netdev's ndo_start_xmit, the driver doesn't
know what to do with it.

This is a problem because drivers very much rely on skb->protocol being
correct, and routinely reject packets where it's incorrect. That's why
having this parsing happen for injected packets is quite important. In
wireguard, ipip, and ipip6, for example, packets from AF_PACKET are just
dropped entirely. For tun devices, it's sort of uglier, with the tun
"packet information" header being passed to userspace containing a bogus
protocol value. Some userspace programs are ill-equipped to deal with
that. (But of course, that doesn't happen with tap devices, which
benefit from the similar shared infrastructure for layer 2 netdevs,
further motiviating this patchset for layer 3 netdevs.)

This patchset addresses the issue by first adding a layer 3 header parse
function, much akin to the existing one for layer 2 packets, and then
adds a shared header_ops structure that, also much akin to the existing
one for layer 2 packets. Then it wires it up to a few immediate places
that stuck out as requiring it, and does a bit of cleanup.

This patchset seems like it's fixing real bugs, so it might be
appropriate for stable. But they're also very old bugs, so if you'd
rather not backport to stable, that'd make sense to me too.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bf64ff4c 8f9a1fa4
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/xdp.h> #include <net/xdp.h>
#include <net/ip_tunnels.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/skb_array.h> #include <linux/skb_array.h>
...@@ -1351,6 +1352,7 @@ static void tun_net_init(struct net_device *dev) ...@@ -1351,6 +1352,7 @@ static void tun_net_init(struct net_device *dev)
switch (tun->flags & TUN_TYPE_MASK) { switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN: case IFF_TUN:
dev->netdev_ops = &tun_netdev_ops; dev->netdev_ops = &tun_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
/* Point-to-Point TUN Device */ /* Point-to-Point TUN Device */
dev->hard_header_len = 0; dev->hard_header_len = 0;
......
...@@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev) ...@@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev)
max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops; dev->netdev_ops = &netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->hard_header_len = 0; dev->hard_header_len = 0;
dev->addr_len = 0; dev->addr_len = 0;
dev->needed_headroom = DATA_PACKET_HEAD_ROOM; dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <net/ip_tunnels.h>
struct wg_device; struct wg_device;
struct wg_peer; struct wg_peer;
...@@ -65,25 +66,9 @@ struct packet_cb { ...@@ -65,25 +66,9 @@ struct packet_cb {
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
/* Returns either the correct skb->protocol value, or 0 if invalid. */
static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
{
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct iphdr)) <=
skb_tail_pointer(skb) &&
ip_hdr(skb)->version == 4)
return htons(ETH_P_IP);
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
skb_tail_pointer(skb) &&
ipv6_hdr(skb)->version == 6)
return htons(ETH_P_IPV6);
return 0;
}
static inline bool wg_check_packet_protocol(struct sk_buff *skb) static inline bool wg_check_packet_protocol(struct sk_buff *skb)
{ {
__be16 real_protocol = wg_examine_packet_protocol(skb); __be16 real_protocol = ip_tunnel_parse_protocol(skb);
return real_protocol && skb->protocol == real_protocol; return real_protocol && skb->protocol == real_protocol;
} }
......
...@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, ...@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/ */
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */ skb->csum_level = ~0; /* All levels */
skb->protocol = wg_examine_packet_protocol(skb); skb->protocol = ip_tunnel_parse_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) { if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len); len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr))) if (unlikely(len < sizeof(struct iphdr)))
......
...@@ -290,6 +290,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -290,6 +290,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark); struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
extern const struct header_ops ip_tunnel_header_ops;
__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
struct ip_tunnel_encap_ops { struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e); size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
......
...@@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void) ...@@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void)
static_branch_dec(&ip_tunnel_metadata_cnt); static_branch_dec(&ip_tunnel_metadata_cnt);
} }
EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
/* Returns either the correct skb->protocol value, or 0 if invalid. */
__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb)
{
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) &&
ip_hdr(skb)->version == 4)
return htons(ETH_P_IP);
if (skb_network_header(skb) >= skb->head &&
(skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) &&
ipv6_hdr(skb)->version == 6)
return htons(ETH_P_IPV6);
return 0;
}
EXPORT_SYMBOL(ip_tunnel_parse_protocol);
const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
EXPORT_SYMBOL(ip_tunnel_header_ops);
...@@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = { ...@@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev) static void vti_tunnel_setup(struct net_device *dev)
{ {
dev->netdev_ops = &vti_netdev_ops; dev->netdev_ops = &vti_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL; dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id); ip_tunnel_setup(dev, vti_net_id);
} }
......
...@@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = { ...@@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = {
static void ipip_tunnel_setup(struct net_device *dev) static void ipip_tunnel_setup(struct net_device *dev)
{ {
dev->netdev_ops = &ipip_netdev_ops; dev->netdev_ops = &ipip_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL; dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP; dev->flags = IFF_NOARP;
......
...@@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { ...@@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev) static void ip6_tnl_dev_setup(struct net_device *dev)
{ {
dev->netdev_ops = &ip6_tnl_netdev_ops; dev->netdev_ops = &ip6_tnl_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true; dev->needs_free_netdev = true;
dev->priv_destructor = ip6_dev_free; dev->priv_destructor = ip6_dev_free;
......
...@@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = { ...@@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = {
static void vti6_dev_setup(struct net_device *dev) static void vti6_dev_setup(struct net_device *dev)
{ {
dev->netdev_ops = &vti6_netdev_ops; dev->netdev_ops = &vti6_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true; dev->needs_free_netdev = true;
dev->priv_destructor = vti6_dev_free; dev->priv_destructor = vti6_dev_free;
......
...@@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) ...@@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
int t_hlen = tunnel->hlen + sizeof(struct iphdr); int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->netdev_ops = &ipip6_netdev_ops; dev->netdev_ops = &ipip6_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true; dev->needs_free_netdev = true;
dev->priv_destructor = ipip6_dev_free; dev->priv_destructor = ipip6_dev_free;
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <net/ip_tunnels.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
...@@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = { ...@@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = {
static void xfrmi_dev_setup(struct net_device *dev) static void xfrmi_dev_setup(struct net_device *dev)
{ {
dev->netdev_ops = &xfrmi_netdev_ops; dev->netdev_ops = &xfrmi_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_NONE; dev->type = ARPHRD_NONE;
dev->mtu = ETH_DATA_LEN; dev->mtu = ETH_DATA_LEN;
dev->min_mtu = ETH_MIN_MTU; dev->min_mtu = ETH_MIN_MTU;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment