Commit 15769ff8 authored by David S. Miller's avatar David S. Miller

Merge branch 'VSOCK-add-vsockmon'

Stefan Hajnoczi says:

====================
VSOCK: vsockmon virtual device to monitor AF_VSOCK sockets.

v5:
 * Change vsock_deliver_tap() API to avoid unnecessary skb creation
   [Jorgen]
 * Fix skb leak when no taps are registered [Jorgen]
 * s/cpu_to_le16(pkt->hdr.op)/le16_to_cpu(pkt->hdr.op)/ [Michael]
 * Add af_vsock_tap.c and vsockmon.[ch] to MAINTAINERS
 * checkpatch.pl and sparse fixes

v4:
 * Add explicit reserved padding field to struct af_vsockmon_hdr and
   drop __attribute__((packed)) [Michael, DaveM]
 * Call synchronize_net() before module_put() [Michael]

v3:
 * Hook virtio_transport.c (guest driver), not just drivers/vhost/vsock.c (host
   driver)
 * Fix DEFAULT_MTU macro definition [Zhu Yanjun]
 * Rename af_vsockmon_hdr->t field ->transport for clarity
 * Update .ndo_get_stats64() return type since it has changed
 * Include missing <linux/module.h> header in af_vsock_tap.c

This is a continuation of Gerard Garcia's work on the vsockmon packet capture
interface for AF_VSOCK.  Packet capture is an essential feature for network
communication.  Gerard began addressing this feature gap in his Google Summer
of Code 2016 project.  I have cleaned up, rebased, and retested the v2 series
he posted previously.

The design follows the nlmon packet capture interface closely.  This is because
vsock has the same problem as netlink: there is no netdev on which packets can
be captured.  The nlmon driver is a synthetic netdev purely for the purpose of
enabling packet capture.  We follow the same approach here with vsockmon.

See include/uapi/linux/vsockmon.h in this series for details on the packet
layout.

How to try it:

1. Build tcpdump with vsockmon patches:

  $ git clone -b vsock https://github.com/stefanha/libpcap
  $ (cd libcap && ./configure && make)
  $ git clone -b vsock https://github.com/stefanha/tcpdump
  $ (cd tcpdump && ./configure && make)

2. Build nc-vsock (a netcat-like tool):

  $ git clone https://github.com/stefanha/nc-vsock
  $ (cd nc-vsock && make)

3. Launch a virtual machine:

  # modprobe vhost_vsock
  # qemu-system-x86_64 -M accel=kvm -m 1024 -cpu host \
      -drive if=virtio,file=test.img,format=raw \
      -device vhost-vsock-pci,guest-cid=3

  (Assumes guest is running a kernel with this patch)

4. Capture AF_VSOCK traffic in guest and/or host:

  # modprobe vsockmon
  # ip link add type vsockmon
  # ip link set vsockmon0 up
  # tcpdump -i vsockmon0 -vvv

5. Communicate!

  (host)$ nc-vsock -l 1234
  (guest)$ nc-vsock 2 1234
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ac2291ce 82dfb540
......@@ -13317,8 +13317,11 @@ L: netdev@vger.kernel.org
S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: include/uapi/linux/vsockmon.h
F: net/vmw_vsock/af_vsock_tap.c
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/net/vsockmon.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
......
......@@ -355,6 +355,14 @@ config NET_VRF
This option enables the support for mapping interfaces into VRF's. The
support enables VRF devices.
config VSOCKMON
tristate "Virtual vsock monitoring device"
depends on VHOST_VSOCK
---help---
This option enables a monitoring net device for vsock sockets. It is
mostly intended for developers or support to debug vsock issues. If
unsure, say N.
endif # NET_CORE
config SUNGEM_PHY
......
......@@ -30,6 +30,7 @@ obj-$(CONFIG_GENEVE) += geneve.o
obj-$(CONFIG_GTP) += gtp.o
obj-$(CONFIG_NLMON) += nlmon.o
obj-$(CONFIG_NET_VRF) += vrf.o
obj-$(CONFIG_VSOCKMON) += vsockmon.o
#
# Networking Drivers
......
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/if_arp.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vsockmon.h>
#include <linux/virtio_vsock.h>
/* Virtio transport max packet size plus header */
#define DEFAULT_MTU (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + \
sizeof(struct af_vsockmon_hdr))
struct pcpu_lstats {
u64 rx_packets;
u64 rx_bytes;
struct u64_stats_sync syncp;
};
static int vsockmon_dev_init(struct net_device *dev)
{
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
return 0;
}
static void vsockmon_dev_uninit(struct net_device *dev)
{
free_percpu(dev->lstats);
}
struct vsockmon {
struct vsock_tap vt;
};
static int vsockmon_open(struct net_device *dev)
{
struct vsockmon *vsockmon = netdev_priv(dev);
vsockmon->vt.dev = dev;
vsockmon->vt.module = THIS_MODULE;
return vsock_add_tap(&vsockmon->vt);
}
static int vsockmon_close(struct net_device *dev)
{
struct vsockmon *vsockmon = netdev_priv(dev);
return vsock_remove_tap(&vsockmon->vt);
}
static netdev_tx_t vsockmon_xmit(struct sk_buff *skb, struct net_device *dev)
{
int len = skb->len;
struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
u64_stats_update_begin(&stats->syncp);
stats->rx_bytes += len;
stats->rx_packets++;
u64_stats_update_end(&stats->syncp);
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
static void
vsockmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
int i;
u64 bytes = 0, packets = 0;
for_each_possible_cpu(i) {
const struct pcpu_lstats *vstats;
u64 tbytes, tpackets;
unsigned int start;
vstats = per_cpu_ptr(dev->lstats, i);
do {
start = u64_stats_fetch_begin_irq(&vstats->syncp);
tbytes = vstats->rx_bytes;
tpackets = vstats->rx_packets;
} while (u64_stats_fetch_retry_irq(&vstats->syncp, start));
packets += tpackets;
bytes += tbytes;
}
stats->rx_packets = packets;
stats->tx_packets = 0;
stats->rx_bytes = bytes;
stats->tx_bytes = 0;
}
static int vsockmon_is_valid_mtu(int new_mtu)
{
return new_mtu >= (int)sizeof(struct af_vsockmon_hdr);
}
static int vsockmon_change_mtu(struct net_device *dev, int new_mtu)
{
if (!vsockmon_is_valid_mtu(new_mtu))
return -EINVAL;
dev->mtu = new_mtu;
return 0;
}
static const struct net_device_ops vsockmon_ops = {
.ndo_init = vsockmon_dev_init,
.ndo_uninit = vsockmon_dev_uninit,
.ndo_open = vsockmon_open,
.ndo_stop = vsockmon_close,
.ndo_start_xmit = vsockmon_xmit,
.ndo_get_stats64 = vsockmon_get_stats64,
.ndo_change_mtu = vsockmon_change_mtu,
};
static u32 always_on(struct net_device *dev)
{
return 1;
}
static const struct ethtool_ops vsockmon_ethtool_ops = {
.get_link = always_on,
};
static void vsockmon_setup(struct net_device *dev)
{
dev->type = ARPHRD_VSOCKMON;
dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &vsockmon_ops;
dev->ethtool_ops = &vsockmon_ethtool_ops;
dev->destructor = free_netdev;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_HIGHDMA | NETIF_F_LLTX;
dev->flags = IFF_NOARP;
dev->mtu = DEFAULT_MTU;
}
static struct rtnl_link_ops vsockmon_link_ops __read_mostly = {
.kind = "vsockmon",
.priv_size = sizeof(struct vsockmon),
.setup = vsockmon_setup,
};
static __init int vsockmon_register(void)
{
return rtnl_link_register(&vsockmon_link_ops);
}
static __exit void vsockmon_unregister(void)
{
rtnl_link_unregister(&vsockmon_link_ops);
}
module_init(vsockmon_register);
module_exit(vsockmon_unregister);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Gerard Garcia <ggarcia@deic.uab.cat>");
MODULE_DESCRIPTION("Vsock monitoring device. Based on nlmon device.");
MODULE_ALIAS_RTNL_LINK("vsockmon");
......@@ -176,6 +176,11 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
restart_tx = true;
}
/* Deliver to monitoring devices all correctly transmitted
* packets.
*/
virtio_transport_deliver_tap_pkt(pkt);
virtio_transport_free_pkt(pkt);
}
if (added)
......@@ -383,6 +388,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
len = pkt->len;
/* Deliver to monitoring devices all received packets */
virtio_transport_deliver_tap_pkt(pkt);
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
virtio_transport_recv_pkt(pkt);
......
......@@ -153,5 +153,6 @@ void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt);
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt);
#endif /* _LINUX_VIRTIO_VSOCK_H */
......@@ -188,4 +188,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
/**** TAP ****/
struct vsock_tap {
struct net_device *dev;
struct module *module;
struct list_head list;
};
int vsock_init_tap(void);
int vsock_add_tap(struct vsock_tap *vt);
int vsock_remove_tap(struct vsock_tap *vt);
void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
#endif /* __AF_VSOCK_H__ */
......@@ -477,6 +477,7 @@ header-y += virtio_types.h
header-y += virtio_vsock.h
header-y += virtio_crypto.h
header-y += vm_sockets.h
header-y += vsockmon.h
header-y += vt.h
header-y += vtpm_proxy.h
header-y += wait.h
......
......@@ -95,6 +95,7 @@
#define ARPHRD_IP6GRE 823 /* GRE over IPv6 */
#define ARPHRD_NETLINK 824 /* Netlink header */
#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */
#define ARPHRD_VSOCKMON 826 /* Vsock monitor header */
#define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */
#define ARPHRD_NONE 0xFFFE /* zero header length */
......
#ifndef _UAPI_VSOCKMON_H
#define _UAPI_VSOCKMON_H
#include <linux/virtio_vsock.h>
/*
* vsockmon is the AF_VSOCK packet capture device. Packets captured have the
* following layout:
*
* +-----------------------------------+
* | vsockmon header |
* | (struct af_vsockmon_hdr) |
* +-----------------------------------+
* | transport header |
* | (af_vsockmon_hdr->len bytes long) |
* +-----------------------------------+
* | payload |
* | (until end of packet) |
* +-----------------------------------+
*
* The vsockmon header is a transport-independent description of the packet.
* It duplicates some of the information from the transport header so that
* no transport-specific knowledge is necessary to process packets.
*
* The transport header is useful for low-level transport-specific packet
* analysis. Transport type is given in af_vsockmon_hdr->transport and
* transport header length is given in af_vsockmon_hdr->len.
*
* If af_vsockmon_hdr->op is AF_VSOCK_OP_PAYLOAD then the payload follows the
* transport header. Other ops do not have a payload.
*/
struct af_vsockmon_hdr {
__le64 src_cid;
__le64 dst_cid;
__le32 src_port;
__le32 dst_port;
__le16 op; /* enum af_vsockmon_op */
__le16 transport; /* enum af_vsockmon_transport */
__le16 len; /* Transport header length */
__u8 reserved[2];
};
enum af_vsockmon_op {
AF_VSOCK_OP_UNKNOWN = 0,
AF_VSOCK_OP_CONNECT = 1,
AF_VSOCK_OP_DISCONNECT = 2,
AF_VSOCK_OP_CONTROL = 3,
AF_VSOCK_OP_PAYLOAD = 4,
};
enum af_vsockmon_transport {
AF_VSOCK_TRANSPORT_UNKNOWN = 0,
AF_VSOCK_TRANSPORT_NO_INFO = 1, /* No transport information */
/* Transport header type: struct virtio_vsock_hdr */
AF_VSOCK_TRANSPORT_VIRTIO = 2,
};
#endif
......@@ -3,7 +3,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
......
/*
* Tap functions for AF_VSOCK sockets.
*
* Code based on net/netlink/af_netlink.c tap functions.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <net/sock.h>
#include <net/af_vsock.h>
#include <linux/if_arp.h>
static DEFINE_SPINLOCK(vsock_tap_lock);
static struct list_head vsock_tap_all __read_mostly =
LIST_HEAD_INIT(vsock_tap_all);
int vsock_add_tap(struct vsock_tap *vt)
{
if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
return -EINVAL;
__module_get(vt->module);
spin_lock(&vsock_tap_lock);
list_add_rcu(&vt->list, &vsock_tap_all);
spin_unlock(&vsock_tap_lock);
return 0;
}
EXPORT_SYMBOL_GPL(vsock_add_tap);
int vsock_remove_tap(struct vsock_tap *vt)
{
struct vsock_tap *tmp;
bool found = false;
spin_lock(&vsock_tap_lock);
list_for_each_entry(tmp, &vsock_tap_all, list) {
if (vt == tmp) {
list_del_rcu(&vt->list);
found = true;
goto out;
}
}
pr_warn("vsock_remove_tap: %p not found\n", vt);
out:
spin_unlock(&vsock_tap_lock);
synchronize_net();
if (found)
module_put(vt->module);
return found ? 0 : -ENODEV;
}
EXPORT_SYMBOL_GPL(vsock_remove_tap);
static int __vsock_deliver_tap_skb(struct sk_buff *skb,
struct net_device *dev)
{
int ret = 0;
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
if (nskb) {
dev_hold(dev);
nskb->dev = dev;
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
dev_put(dev);
}
return ret;
}
static void __vsock_deliver_tap(struct sk_buff *skb)
{
int ret;
struct vsock_tap *tmp;
list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
ret = __vsock_deliver_tap_skb(skb, tmp->dev);
if (unlikely(ret))
break;
}
}
void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque)
{
struct sk_buff *skb;
rcu_read_lock();
if (likely(list_empty(&vsock_tap_all)))
goto out;
skb = build_skb(opaque);
if (skb) {
__vsock_deliver_tap(skb);
consume_skb(skb);
}
out:
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(vsock_deliver_tap);
......@@ -144,6 +144,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
virtio_transport_deliver_tap_pkt(pkt);
reply = pkt->reply;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
......@@ -370,6 +372,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
}
pkt->len = len - sizeof(pkt->hdr);
virtio_transport_deliver_tap_pkt(pkt);
virtio_transport_recv_pkt(pkt);
}
} while (!virtqueue_enable_cb(vq));
......
......@@ -16,6 +16,7 @@
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
#include <uapi/linux/vsockmon.h>
#include <net/sock.h>
#include <net/af_vsock.h>
......@@ -85,6 +86,69 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
return NULL;
}
/* Packet capture */
static struct sk_buff *virtio_transport_build_skb(void *opaque)
{
struct virtio_vsock_pkt *pkt = opaque;
unsigned char *t_hdr, *payload;
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
GFP_ATOMIC);
if (!skb)
return NULL;
hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr));
/* pkt->hdr is little-endian so no need to byteswap here */
hdr->src_cid = pkt->hdr.src_cid;
hdr->src_port = pkt->hdr.src_port;
hdr->dst_cid = pkt->hdr.dst_cid;
hdr->dst_port = pkt->hdr.dst_port;
hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
hdr->len = cpu_to_le16(sizeof(pkt->hdr));
memset(hdr->reserved, 0, sizeof(hdr->reserved));
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_REQUEST:
case VIRTIO_VSOCK_OP_RESPONSE:
hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
break;
case VIRTIO_VSOCK_OP_RST:
case VIRTIO_VSOCK_OP_SHUTDOWN:
hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
break;
case VIRTIO_VSOCK_OP_RW:
hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
break;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
break;
default:
hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
break;
}
t_hdr = skb_put(skb, sizeof(pkt->hdr));
memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr));
if (pkt->len) {
payload = skb_put(skb, pkt->len);
memcpy(payload, pkt->buf, pkt->len);
}
return skb;
}
void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
{
vsock_deliver_tap(virtio_transport_build_skb, pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment