Commit 5d38f324 authored by Erel Geron's avatar Erel Geron Committed by Richard Weinberger

um: drivers: Add virtio vhost-user driver

This module allows virtio devices to be used over a vhost-user socket.
Signed-off-by: default avatarErel Geron <erelx.geron@intel.com>
Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
Signed-off-by: default avatarRichard Weinberger <richard@nod.at>
parent 851b6cb1
...@@ -335,3 +335,10 @@ config UML_NET_SLIRP ...@@ -335,3 +335,10 @@ config UML_NET_SLIRP
Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
endmenu endmenu
config VIRTIO_UML
tristate "UML driver for virtio devices"
select VIRTIO
help
This driver provides support for virtio based paravirtual device
drivers over vhost-user sockets.
...@@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o ...@@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
obj-$(CONFIG_UML_WATCHDOG) += harddog.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o
obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_UML_RANDOM) += random.o
obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
# pcap_user.o must be added explicitly. # pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
......
// SPDX-License-Identifier: GPL-2.0-or-later
/* Vhost-user protocol */
#ifndef __VHOST_USER_H__
#define __VHOST_USER_H__
/* Message flags */
#define VHOST_USER_FLAG_REPLY BIT(2)
/* Feature bits */
#define VHOST_USER_F_PROTOCOL_FEATURES 30
/* Protocol feature bits */
#define VHOST_USER_PROTOCOL_F_CONFIG 9
/* Vring state index masks */
#define VHOST_USER_VRING_INDEX_MASK 0xff
#define VHOST_USER_VRING_POLL_MASK BIT(8)
/* Supported version */
#define VHOST_USER_VERSION 1
/* Supported transport features */
#define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
/* Supported protocol features */
#define VHOST_USER_SUPPORTED_PROTOCOL_F BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
enum vhost_user_request {
VHOST_USER_GET_FEATURES = 1,
VHOST_USER_SET_FEATURES = 2,
VHOST_USER_SET_OWNER = 3,
VHOST_USER_RESET_OWNER = 4,
VHOST_USER_SET_MEM_TABLE = 5,
VHOST_USER_SET_LOG_BASE = 6,
VHOST_USER_SET_LOG_FD = 7,
VHOST_USER_SET_VRING_NUM = 8,
VHOST_USER_SET_VRING_ADDR = 9,
VHOST_USER_SET_VRING_BASE = 10,
VHOST_USER_GET_VRING_BASE = 11,
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_NET_SEND_MTU = 20,
VHOST_USER_SET_SLAVE_REQ_FD = 21,
VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_SET_VRING_ENDIAN = 23,
VHOST_USER_GET_CONFIG = 24,
VHOST_USER_SET_CONFIG = 25,
};
struct vhost_user_header {
u32 request; /* Use enum vhost_user_request */
u32 flags;
u32 size;
} __packed;
struct vhost_user_config {
u32 offset;
u32 size;
u32 flags;
u8 payload[0]; /* Variable length */
} __packed;
struct vhost_user_vring_state {
u32 index;
u32 num;
} __packed;
struct vhost_user_vring_addr {
u32 index;
u32 flags;
u64 desc, used, avail, log;
} __packed;
struct vhost_user_mem_region {
u64 guest_addr;
u64 size;
u64 user_addr;
u64 mmap_offset;
} __packed;
struct vhost_user_mem_regions {
u32 num;
u32 padding;
struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
} __packed;
union vhost_user_payload {
u64 integer;
struct vhost_user_config config;
struct vhost_user_vring_state vring_state;
struct vhost_user_vring_addr vring_addr;
struct vhost_user_mem_regions mem_regions;
};
struct vhost_user_msg {
struct vhost_user_header header;
union vhost_user_payload payload;
} __packed;
#endif
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Virtio vhost-user driver
*
* Copyright(c) 2019 Intel Corporation
*
* This module allows virtio devices to be used over a vhost-user socket.
*
* Guest devices can be instantiated by kernel module or command line
* parameters. One device will be created for each parameter. Syntax:
*
* [virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
* where:
* <socket> := vhost-user socket path to connect
* <virtio_id> := virtio device id (as in virtio_ids.h)
* <platform_id> := (optional) platform device id
*
* example:
* virtio_uml.device=/var/uml.socket:1
*
* Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
*/
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <shared/as-layout.h>
#include <irq_kern.h>
#include <init.h>
#include <os.h>
#include "vhost_user.h"
/* Workaround due to a conflict between irq_user.h and irqreturn.h */
#ifdef IRQ_NONE
#undef IRQ_NONE
#endif
#define MAX_SUPPORTED_QUEUE_SIZE 256
#define to_virtio_uml_device(_vdev) \
container_of(_vdev, struct virtio_uml_device, vdev)
struct virtio_uml_device {
struct virtio_device vdev;
struct platform_device *pdev;
int sock;
u64 features;
u64 protocol_features;
u8 status;
};
struct virtio_uml_vq_info {
int kick_fd, call_fd;
char name[32];
};
extern unsigned long long physmem_size, highmem;
#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
/* Vhost-user protocol */
static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num)
{
int rc;
do {
rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
if (rc > 0) {
buf += rc;
len -= rc;
fds = NULL;
fds_num = 0;
}
} while (len && (rc >= 0 || rc == -EINTR));
if (rc < 0)
return rc;
return 0;
}
static int full_read(int fd, void *buf, int len)
{
int rc;
do {
rc = os_read_file(fd, buf, len);
if (rc > 0) {
buf += rc;
len -= rc;
}
} while (len && (rc > 0 || rc == -EINTR));
if (rc < 0)
return rc;
if (rc == 0)
return -ECONNRESET;
return 0;
}
static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg)
{
size_t size = sizeof(msg->header);
int rc;
rc = full_read(vu_dev->sock, (void *) msg, size);
if (rc)
return rc;
if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
return -EPROTO;
return 0;
}
static int vhost_user_recv(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg,
size_t max_payload_size)
{
size_t size;
int rc = vhost_user_recv_header(vu_dev, msg);
if (rc)
return rc;
size = msg->header.size;
if (size > max_payload_size)
return -EPROTO;
return full_read(vu_dev->sock, (void *) &msg->payload, size);
}
static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
u64 *value)
{
struct vhost_user_msg msg;
int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
if (rc)
return rc;
if (msg.header.size != sizeof(msg.payload.integer))
return -EPROTO;
*value = msg.payload.integer;
return 0;
}
static int vhost_user_send(struct virtio_uml_device *vu_dev,
struct vhost_user_msg *msg,
int *fds, size_t num_fds)
{
size_t size = sizeof(msg->header) + msg->header.size;
msg->header.flags |= VHOST_USER_VERSION;
return full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
}
static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
u32 request)
{
struct vhost_user_msg msg = {
.header.request = request,
};
return vhost_user_send(vu_dev, &msg, NULL, 0);
}
static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
u32 request, u64 value)
{
struct vhost_user_msg msg = {
.header.request = request,
.header.size = sizeof(msg.payload.integer),
.payload.integer = value,
};
return vhost_user_send(vu_dev, &msg, NULL, 0);
}
static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
{
return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
}
static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
u64 *features)
{
int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
if (rc)
return rc;
return vhost_user_recv_u64(vu_dev, features);
}
static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
u64 features)
{
return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
}
static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
u64 *protocol_features)
{
int rc = vhost_user_send_no_payload(vu_dev,
VHOST_USER_GET_PROTOCOL_FEATURES);
if (rc)
return rc;
return vhost_user_recv_u64(vu_dev, protocol_features);
}
static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
u64 protocol_features)
{
return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
protocol_features);
}
static int vhost_user_init(struct virtio_uml_device *vu_dev)
{
int rc = vhost_user_set_owner(vu_dev);
if (rc)
return rc;
rc = vhost_user_get_features(vu_dev, &vu_dev->features);
if (rc)
return rc;
if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
rc = vhost_user_get_protocol_features(vu_dev,
&vu_dev->protocol_features);
if (rc)
return rc;
vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
rc = vhost_user_set_protocol_features(vu_dev,
vu_dev->protocol_features);
}
return rc;
}
static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
u32 offset, void *buf, u32 len)
{
u32 cfg_size = offset + len;
struct vhost_user_msg *msg;
size_t payload_size = sizeof(msg->payload.config) + cfg_size;
size_t msg_size = sizeof(msg->header) + payload_size;
int rc;
if (!(vu_dev->protocol_features &
BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
return;
msg = kzalloc(msg_size, GFP_KERNEL);
if (!msg)
return;
msg->header.request = VHOST_USER_GET_CONFIG;
msg->header.size = payload_size;
msg->payload.config.offset = 0;
msg->payload.config.size = cfg_size;
rc = vhost_user_send(vu_dev, msg, NULL, 0);
if (rc) {
vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
rc);
goto free;
}
rc = vhost_user_recv(vu_dev, msg, msg_size);
if (rc) {
vu_err(vu_dev,
"receiving VHOST_USER_GET_CONFIG response failed: %d\n",
rc);
goto free;
}
if (msg->header.size != payload_size ||
msg->payload.config.size != cfg_size) {
rc = -EPROTO;
vu_err(vu_dev,
"Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
msg->header.size, payload_size,
msg->payload.config.size, cfg_size);
goto free;
}
memcpy(buf, msg->payload.config.payload + offset, len);
free:
kfree(msg);
}
static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
u32 offset, const void *buf, u32 len)
{
struct vhost_user_msg *msg;
size_t payload_size = sizeof(msg->payload.config) + len;
size_t msg_size = sizeof(msg->header) + payload_size;
int rc;
if (!(vu_dev->protocol_features &
BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
return;
msg = kzalloc(msg_size, GFP_KERNEL);
if (!msg)
return;
msg->header.request = VHOST_USER_SET_CONFIG;
msg->header.size = payload_size;
msg->payload.config.offset = offset;
msg->payload.config.size = len;
memcpy(msg->payload.config.payload, buf, len);
rc = vhost_user_send(vu_dev, msg, NULL, 0);
if (rc)
vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
rc);
kfree(msg);
}
static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
struct vhost_user_mem_region *region_out)
{
unsigned long long mem_offset;
int rc = phys_mapping(addr, &mem_offset);
if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
return -EFAULT;
*fd_out = rc;
region_out->guest_addr = addr;
region_out->user_addr = addr;
region_out->size = size;
region_out->mmap_offset = mem_offset;
/* Ensure mapping is valid for the entire region */
rc = phys_mapping(addr + size - 1, &mem_offset);
if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
addr + size - 1, rc, *fd_out))
return -EFAULT;
return 0;
}
static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
{
struct vhost_user_msg msg = {
.header.request = VHOST_USER_SET_MEM_TABLE,
.header.size = sizeof(msg.payload.mem_regions),
.payload.mem_regions.num = 1,
};
unsigned long reserved = uml_reserved - uml_physmem;
int fds[2];
int rc;
/*
* This is a bit tricky, see also the comment with setup_physmem().
*
* Essentially, setup_physmem() uses a file to mmap() our physmem,
* but the code and data we *already* have is omitted. To us, this
* is no difference, since they both become part of our address
* space and memory consumption. To somebody looking in from the
* outside, however, it is different because the part of our memory
* consumption that's already part of the binary (code/data) is not
* mapped from the file, so it's not visible to another mmap from
* the file descriptor.
*
* Thus, don't advertise this space to the vhost-user slave. This
* means that the slave will likely abort or similar when we give
* it an address from the hidden range, since it's not marked as
* a valid address, but at least that way we detect the issue and
* don't just have the slave read an all-zeroes buffer from the
* shared memory file, or write something there that we can never
* see (depending on the direction of the virtqueue traffic.)
*
* Since we usually don't want to use .text for virtio buffers,
* this effectively means that you cannot use
* 1) global variables, which are in the .bss and not in the shm
* file-backed memory
* 2) the stack in some processes, depending on where they have
* their stack (or maybe only no interrupt stack?)
*
* The stack is already not typically valid for DMA, so this isn't
* much of a restriction, but global variables might be encountered.
*
* It might be possible to fix it by copying around the data that's
* between bss_start and where we map the file now, but it's not
* something that you typically encounter with virtio drivers, so
* it didn't seem worthwhile.
*/
rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
&fds[0],
&msg.payload.mem_regions.regions[0]);
if (rc < 0)
return rc;
if (highmem) {
msg.payload.mem_regions.num++;
rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
&fds[1], &msg.payload.mem_regions.regions[1]);
if (rc < 0)
return rc;
}
return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
}
static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
u32 request, u32 index, u32 num)
{
struct vhost_user_msg msg = {
.header.request = request,
.header.size = sizeof(msg.payload.vring_state),
.payload.vring_state.index = index,
.payload.vring_state.num = num,
};
return vhost_user_send(vu_dev, &msg, NULL, 0);
}
static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
u32 index, u32 num)
{
return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
index, num);
}
static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
u32 index, u32 offset)
{
return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
index, offset);
}
static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
u32 index, u64 desc, u64 used, u64 avail,
u64 log)
{
struct vhost_user_msg msg = {
.header.request = VHOST_USER_SET_VRING_ADDR,
.header.size = sizeof(msg.payload.vring_addr),
.payload.vring_addr.index = index,
.payload.vring_addr.desc = desc,
.payload.vring_addr.used = used,
.payload.vring_addr.avail = avail,
.payload.vring_addr.log = log,
};
return vhost_user_send(vu_dev, &msg, NULL, 0);
}
static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
u32 request, int index, int fd)
{
struct vhost_user_msg msg = {
.header.request = request,
.header.size = sizeof(msg.payload.integer),
.payload.integer = index,
};
if (index & ~VHOST_USER_VRING_INDEX_MASK)
return -EINVAL;
if (fd < 0) {
msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
return vhost_user_send(vu_dev, &msg, NULL, 0);
}
return vhost_user_send(vu_dev, &msg, &fd, 1);
}
static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
int index, int fd)
{
return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
index, fd);
}
static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
int index, int fd)
{
return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
index, fd);
}
static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
u32 index, bool enable)
{
if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
return 0;
return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
index, enable);
}
/* Virtio interface */
static bool vu_notify(struct virtqueue *vq)
{
struct virtio_uml_vq_info *info = vq->priv;
const uint64_t n = 1;
int rc;
do {
rc = os_write_file(info->kick_fd, &n, sizeof(n));
} while (rc == -EINTR);
return !WARN(rc != sizeof(n), "write returned %d\n", rc);
}
static irqreturn_t vu_interrupt(int irq, void *opaque)
{
struct virtqueue *vq = opaque;
struct virtio_uml_vq_info *info = vq->priv;
uint64_t n;
int rc;
irqreturn_t ret = IRQ_NONE;
do {
rc = os_read_file(info->call_fd, &n, sizeof(n));
if (rc == sizeof(n))
ret |= vring_interrupt(irq, vq);
} while (rc == sizeof(n) || rc == -EINTR);
WARN(rc != -EAGAIN, "read returned %d\n", rc);
return ret;
}
static void vu_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
vhost_user_get_config(vu_dev, offset, buf, len);
}
static void vu_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
vhost_user_set_config(vu_dev, offset, buf, len);
}
static u8 vu_get_status(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
return vu_dev->status;
}
static void vu_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
vu_dev->status = status;
}
static void vu_reset(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
vu_dev->status = 0;
}
static void vu_del_vq(struct virtqueue *vq)
{
struct virtio_uml_vq_info *info = vq->priv;
um_free_irq(VIRTIO_IRQ, vq);
os_close_file(info->call_fd);
os_close_file(info->kick_fd);
vring_del_virtqueue(vq);
kfree(info);
}
static void vu_del_vqs(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
struct virtqueue *vq, *n;
u64 features;
/* Note: reverse order as a workaround to a decoding bug in snabb */
list_for_each_entry_reverse(vq, &vdev->vqs, list)
WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
/* Ensure previous messages have been processed */
WARN_ON(vhost_user_get_features(vu_dev, &features));
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
vu_del_vq(vq);
}
static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
struct virtqueue *vq)
{
struct virtio_uml_vq_info *info = vq->priv;
int call_fds[2];
int rc;
/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
rc = os_pipe(call_fds, true, true);
if (rc < 0)
return rc;
info->call_fd = call_fds[0];
rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
vu_interrupt, IRQF_SHARED, info->name, vq);
if (rc)
goto close_both;
rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
if (rc)
goto release_irq;
goto out;
release_irq:
um_free_irq(VIRTIO_IRQ, vq);
close_both:
os_close_file(call_fds[0]);
out:
/* Close (unused) write end of call fds */
os_close_file(call_fds[1]);
return rc;
}
static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
unsigned index, vq_callback_t *callback,
const char *name, bool ctx)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
struct platform_device *pdev = vu_dev->pdev;
struct virtio_uml_vq_info *info;
struct virtqueue *vq;
int num = MAX_SUPPORTED_QUEUE_SIZE;
int rc;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto error_kzalloc;
}
snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
pdev->id, name);
vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
ctx, vu_notify, callback, info->name);
if (!vq) {
rc = -ENOMEM;
goto error_create;
}
vq->priv = info;
num = virtqueue_get_vring_size(vq);
rc = os_eventfd(0, 0);
if (rc < 0)
goto error_kick;
info->kick_fd = rc;
rc = vu_setup_vq_call_fd(vu_dev, vq);
if (rc)
goto error_call;
rc = vhost_user_set_vring_num(vu_dev, index, num);
if (rc)
goto error_setup;
rc = vhost_user_set_vring_base(vu_dev, index, 0);
if (rc)
goto error_setup;
rc = vhost_user_set_vring_addr(vu_dev, index,
virtqueue_get_desc_addr(vq),
virtqueue_get_used_addr(vq),
virtqueue_get_avail_addr(vq),
(u64) -1);
if (rc)
goto error_setup;
return vq;
error_setup:
um_free_irq(VIRTIO_IRQ, vq);
os_close_file(info->call_fd);
error_call:
os_close_file(info->kick_fd);
error_kick:
vring_del_virtqueue(vq);
error_create:
kfree(info);
error_kzalloc:
return ERR_PTR(rc);
}
static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
int i, queue_idx = 0, rc;
struct virtqueue *vq;
rc = vhost_user_set_mem_table(vu_dev);
if (rc)
return rc;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
}
vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
rc = PTR_ERR(vqs[i]);
goto error_setup;
}
}
list_for_each_entry(vq, &vdev->vqs, list) {
struct virtio_uml_vq_info *info = vq->priv;
rc = vhost_user_set_vring_kick(vu_dev, vq->index,
info->kick_fd);
if (rc)
goto error_setup;
rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
if (rc)
goto error_setup;
}
return 0;
error_setup:
vu_del_vqs(vdev);
return rc;
}
static u64 vu_get_features(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
return vu_dev->features;
}
static int vu_finalize_features(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
vring_transport_features(vdev);
vu_dev->features = vdev->features | supported;
return vhost_user_set_features(vu_dev, vu_dev->features);
}
static const char *vu_bus_name(struct virtio_device *vdev)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
return vu_dev->pdev->name;
}
static const struct virtio_config_ops virtio_uml_config_ops = {
.get = vu_get,
.set = vu_set,
.get_status = vu_get_status,
.set_status = vu_set_status,
.reset = vu_reset,
.find_vqs = vu_find_vqs,
.del_vqs = vu_del_vqs,
.get_features = vu_get_features,
.finalize_features = vu_finalize_features,
.bus_name = vu_bus_name,
};
static void virtio_uml_release_dev(struct device *d)
{
struct virtio_device *vdev =
container_of(d, struct virtio_device, dev);
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
os_close_file(vu_dev->sock);
}
/* Platform device */
struct virtio_uml_platform_data {
u32 virtio_device_id;
const char *socket_path;
};
static int virtio_uml_probe(struct platform_device *pdev)
{
struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
struct virtio_uml_device *vu_dev;
int rc;
if (!pdata)
return -EINVAL;
vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
if (!vu_dev)
return -ENOMEM;
vu_dev->vdev.dev.parent = &pdev->dev;
vu_dev->vdev.dev.release = virtio_uml_release_dev;
vu_dev->vdev.config = &virtio_uml_config_ops;
vu_dev->vdev.id.device = pdata->virtio_device_id;
vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
vu_dev->pdev = pdev;
do {
rc = os_connect_socket(pdata->socket_path);
} while (rc == -EINTR);
if (rc < 0)
return rc;
vu_dev->sock = rc;
rc = vhost_user_init(vu_dev);
if (rc)
goto error_init;
platform_set_drvdata(pdev, vu_dev);
rc = register_virtio_device(&vu_dev->vdev);
if (rc)
put_device(&vu_dev->vdev.dev);
return rc;
error_init:
os_close_file(vu_dev->sock);
return rc;
}
static int virtio_uml_remove(struct platform_device *pdev)
{
struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
unregister_virtio_device(&vu_dev->vdev);
return 0;
}
/* Command line device list */
static void vu_cmdline_release_dev(struct device *d)
{
}
static struct device vu_cmdline_parent = {
.init_name = "virtio-uml-cmdline",
.release = vu_cmdline_release_dev,
};
static bool vu_cmdline_parent_registered;
static int vu_cmdline_id;
static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
{
const char *ids = strchr(device, ':');
unsigned int virtio_device_id;
int processed, consumed, err;
char *socket_path;
struct virtio_uml_platform_data pdata;
struct platform_device *pdev;
if (!ids || ids == device)
return -EINVAL;
processed = sscanf(ids, ":%u%n:%d%n",
&virtio_device_id, &consumed,
&vu_cmdline_id, &consumed);
if (processed < 1 || ids[consumed])
return -EINVAL;
if (!vu_cmdline_parent_registered) {
err = device_register(&vu_cmdline_parent);
if (err) {
pr_err("Failed to register parent device!\n");
put_device(&vu_cmdline_parent);
return err;
}
vu_cmdline_parent_registered = true;
}
socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
if (!socket_path)
return -ENOMEM;
pdata.virtio_device_id = (u32) virtio_device_id;
pdata.socket_path = socket_path;
pr_info("Registering device virtio-uml.%d id=%d at %s\n",
vu_cmdline_id, virtio_device_id, socket_path);
pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
vu_cmdline_id++, &pdata,
sizeof(pdata));
err = PTR_ERR_OR_ZERO(pdev);
if (err)
goto free;
return 0;
free:
kfree(socket_path);
return err;
}
static int vu_cmdline_get_device(struct device *dev, void *data)
{
struct platform_device *pdev = to_platform_device(dev);
struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
char *buffer = data;
unsigned int len = strlen(buffer);
snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
pdata->socket_path, pdata->virtio_device_id, pdev->id);
return 0;
}
static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
{
buffer[0] = '\0';
if (vu_cmdline_parent_registered)
device_for_each_child(&vu_cmdline_parent, buffer,
vu_cmdline_get_device);
return strlen(buffer) + 1;
}
static const struct kernel_param_ops vu_cmdline_param_ops = {
.set = vu_cmdline_set,
.get = vu_cmdline_get,
};
device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
__uml_help(vu_cmdline_param_ops,
"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
" Configure a virtio device over a vhost-user socket.\n"
" See virtio_ids.h for a list of possible virtio device id values.\n"
" Optionally use a specific platform_device id.\n\n"
);
static int vu_unregister_cmdline_device(struct device *dev, void *data)
{
struct platform_device *pdev = to_platform_device(dev);
struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
kfree(pdata->socket_path);
platform_device_unregister(pdev);
return 0;
}
static void vu_unregister_cmdline_devices(void)
{
if (vu_cmdline_parent_registered) {
device_for_each_child(&vu_cmdline_parent, NULL,
vu_unregister_cmdline_device);
device_unregister(&vu_cmdline_parent);
vu_cmdline_parent_registered = false;
}
}
/* Platform driver */
static const struct of_device_id virtio_uml_match[] = {
{ .compatible = "virtio,uml", },
{ }
};
MODULE_DEVICE_TABLE(of, virtio_uml_match);
static struct platform_driver virtio_uml_driver = {
.probe = virtio_uml_probe,
.remove = virtio_uml_remove,
.driver = {
.name = "virtio-uml",
.of_match_table = virtio_uml_match,
},
};
static int __init virtio_uml_init(void)
{
return platform_driver_register(&virtio_uml_driver);
}
static void __exit virtio_uml_exit(void)
{
platform_driver_unregister(&virtio_uml_driver);
vu_unregister_cmdline_devices();
}
module_init(virtio_uml_init);
module_exit(virtio_uml_exit);
__uml_exitcall(virtio_uml_exit);
MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
MODULE_LICENSE("GPL");
...@@ -17,17 +17,18 @@ ...@@ -17,17 +17,18 @@
#define TELNETD_IRQ 12 #define TELNETD_IRQ 12
#define XTERM_IRQ 13 #define XTERM_IRQ 13
#define RANDOM_IRQ 14 #define RANDOM_IRQ 14
#define VIRTIO_IRQ 15
#ifdef CONFIG_UML_NET_VECTOR #ifdef CONFIG_UML_NET_VECTOR
#define VECTOR_BASE_IRQ 15 #define VECTOR_BASE_IRQ (VIRTIO_IRQ + 1)
#define VECTOR_IRQ_SPACE 8 #define VECTOR_IRQ_SPACE 8
#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1) #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
#else #else
#define LAST_IRQ RANDOM_IRQ #define LAST_IRQ VIRTIO_IRQ
#endif #endif
......
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
#define OS_LIB_PATH "/usr/lib/" #define OS_LIB_PATH "/usr/lib/"
#endif #endif
#define OS_SENDMSG_MAX_FDS 8
/* /*
* types taken from stat_file() in hostfs_user.c * types taken from stat_file() in hostfs_user.c
* (if they are wrong here, they are wrong there...). * (if they are wrong here, they are wrong there...).
...@@ -176,6 +178,9 @@ extern unsigned os_major(unsigned long long dev); ...@@ -176,6 +178,9 @@ extern unsigned os_major(unsigned long long dev);
extern unsigned os_minor(unsigned long long dev); extern unsigned os_minor(unsigned long long dev);
extern unsigned long long os_makedev(unsigned major, unsigned minor); extern unsigned long long os_makedev(unsigned major, unsigned minor);
extern int os_falloc_punch(int fd, unsigned long long offset, int count); extern int os_falloc_punch(int fd, unsigned long long offset, int count);
extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num);
/* start_up.c */ /* start_up.c */
extern void os_early_checks(void); extern void os_early_checks(void);
......
...@@ -38,6 +38,8 @@ EXPORT_SYMBOL(run_helper); ...@@ -38,6 +38,8 @@ EXPORT_SYMBOL(run_helper);
EXPORT_SYMBOL(os_major); EXPORT_SYMBOL(os_major);
EXPORT_SYMBOL(os_minor); EXPORT_SYMBOL(os_minor);
EXPORT_SYMBOL(os_makedev); EXPORT_SYMBOL(os_makedev);
EXPORT_SYMBOL(os_eventfd);
EXPORT_SYMBOL(os_sendmsg_fds);
EXPORT_SYMBOL(add_sigio_fd); EXPORT_SYMBOL(add_sigio_fd);
EXPORT_SYMBOL(ignore_sigio_fd); EXPORT_SYMBOL(ignore_sigio_fd);
......
...@@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD]; ...@@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* Initialized at boot time, and readonly after that */ /* Initialized at boot time, and readonly after that */
unsigned long long highmem; unsigned long long highmem;
EXPORT_SYMBOL(highmem);
int kmalloc_ok = 0; int kmalloc_ok = 0;
/* Used during early boot */ /* Used during early boot */
......
...@@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) ...@@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
return fd; return fd;
} }
EXPORT_SYMBOL(phys_mapping);
static int __init uml_mem_setup(char *line, int *add) static int __init uml_mem_setup(char *line, int *add)
{ {
......
...@@ -113,6 +113,7 @@ static int have_root __initdata = 0; ...@@ -113,6 +113,7 @@ static int have_root __initdata = 0;
/* Set in uml_mem_setup and modified in linux_main */ /* Set in uml_mem_setup and modified in linux_main */
long long physmem_size = 32 * 1024 * 1024; long long physmem_size = 32 * 1024 * 1024;
EXPORT_SYMBOL(physmem_size);
static const char *usage_string = static const char *usage_string =
"User Mode Linux v%s\n" "User Mode Linux v%s\n"
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
#include <sys/un.h> #include <sys/un.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/eventfd.h>
#include <os.h> #include <os.h>
static void copy_stat(struct uml_stat *dst, const struct stat64 *src) static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
...@@ -620,3 +621,46 @@ int os_falloc_punch(int fd, unsigned long long offset, int len) ...@@ -620,3 +621,46 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
return n; return n;
} }
int os_eventfd(unsigned int initval, int flags)
{
int fd = eventfd(initval, flags);
if (fd < 0)
return -errno;
return fd;
}
int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
unsigned int fds_num)
{
struct iovec iov = {
.iov_base = (void *) buf,
.iov_len = len,
};
union {
char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
struct cmsghdr align;
} u;
unsigned int fds_size = sizeof(*fds) * fds_num;
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = u.control,
.msg_controllen = CMSG_SPACE(fds_size),
};
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
int err;
if (fds_num > OS_SENDMSG_MAX_FDS)
return -EINVAL;
memset(u.control, 0, sizeof(u.control));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(fds_size);
memcpy(CMSG_DATA(cmsg), fds, fds_size);
err = sendmsg(fd, &msg, 0);
if (err < 0)
return -errno;
return err;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment