Commit e9589300 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull vhost cleanups and fixes from Michael Tsirkin:
 "Here are vhost cleanups and fixes by Asias He and myself.  They affect
  both vhost-net and vhost-scsi devices.  They also *depend* on both
  net-next and target-pending, where the net and target commits these
  changes depend on are already merged.  So merging through the common
  vhost tree."

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vhost_scsi: module rename
  tcm_vhost: header split up
  vhost: src file renames
  vhost: fix error handling in RESET_OWNER ioctl
  tcm_vhost: remove virtio-net.h dependency
  vhost: move per-vq net specific fields out to net
  tcm_vhost: document inflight ref-counting use
  vhost: move vhost-net zerocopy fields to net.c
  tcm_vhost: Wait for pending requests in vhost_scsi_flush()
  vhost: Allow device specific fields per vq
parents 5a148af6 181c04a3
...@@ -9,6 +9,10 @@ config VHOST_NET ...@@ -9,6 +9,10 @@ config VHOST_NET
To compile this driver as a module, choose M here: the module will To compile this driver as a module, choose M here: the module will
be called vhost_net. be called vhost_net.
if STAGING config VHOST_SCSI
source "drivers/vhost/Kconfig.tcm" tristate "VHOST_SCSI TCM fabric driver"
endif depends on TARGET_CORE && EVENTFD && m
default n
---help---
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
config TCM_VHOST
tristate "TCM_VHOST fabric module"
depends on TARGET_CORE && EVENTFD && m
default n
---help---
Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests
obj-$(CONFIG_VHOST_NET) += vhost_net.o obj-$(CONFIG_VHOST_NET) += vhost_net.o
vhost_net-y := vhost.o net.o vhost_net-y := vhost.o net.o
obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
...@@ -64,9 +64,35 @@ enum { ...@@ -64,9 +64,35 @@ enum {
VHOST_NET_VQ_MAX = 2, VHOST_NET_VQ_MAX = 2,
}; };
struct vhost_ubuf_ref {
struct kref kref;
wait_queue_head_t wait;
struct vhost_virtqueue *vq;
};
struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
/* hdr is used to store the virtio header.
* Since each iovec has >= 1 byte length, we never need more than
* header length entries to store the header. */
struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
size_t vhost_hlen;
size_t sock_hlen;
/* vhost zerocopy support fields below: */
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
/* first used idx for DMA done zerocopy buffers */
int done_idx;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_ubuf_ref *ubufs;
};
struct vhost_net { struct vhost_net {
struct vhost_dev dev; struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX];
/* Number of TX recently submitted. /* Number of TX recently submitted.
* Protected by tx vq lock. */ * Protected by tx vq lock. */
...@@ -78,6 +104,90 @@ struct vhost_net { ...@@ -78,6 +104,90 @@ struct vhost_net {
bool tx_flush; bool tx_flush;
}; };
static unsigned vhost_zcopy_mask __read_mostly;
void vhost_enable_zcopy(int vq)
{
vhost_zcopy_mask |= 0x1 << vq;
}
static void vhost_zerocopy_done_signal(struct kref *kref)
{
struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
kref);
wake_up(&ubufs->wait);
}
struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
bool zcopy)
{
struct vhost_ubuf_ref *ubufs;
/* No zero copy backend? Nothing to count. */
if (!zcopy)
return NULL;
ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
if (!ubufs)
return ERR_PTR(-ENOMEM);
kref_init(&ubufs->kref);
init_waitqueue_head(&ubufs->wait);
ubufs->vq = vq;
return ubufs;
}
void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
{
kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
}
void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
{
kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
kfree(ubufs);
}
int vhost_net_set_ubuf_info(struct vhost_net *n)
{
bool zcopy;
int i;
for (i = 0; i < n->dev.nvqs; ++i) {
zcopy = vhost_zcopy_mask & (0x1 << i);
if (!zcopy)
continue;
n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
UIO_MAXIOV, GFP_KERNEL);
if (!n->vqs[i].ubuf_info)
goto err;
}
return 0;
err:
while (i--) {
zcopy = vhost_zcopy_mask & (0x1 << i);
if (!zcopy)
continue;
kfree(n->vqs[i].ubuf_info);
}
return -ENOMEM;
}
void vhost_net_vq_reset(struct vhost_net *n)
{
int i;
for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
n->vqs[i].done_idx = 0;
n->vqs[i].upend_idx = 0;
n->vqs[i].ubufs = NULL;
kfree(n->vqs[i].ubuf_info);
n->vqs[i].ubuf_info = NULL;
n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0;
}
}
static void vhost_net_tx_packet(struct vhost_net *net) static void vhost_net_tx_packet(struct vhost_net *net)
{ {
++net->tx_packets; ++net->tx_packets;
...@@ -153,10 +263,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, ...@@ -153,10 +263,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
static int vhost_zerocopy_signal_used(struct vhost_net *net, static int vhost_zerocopy_signal_used(struct vhost_net *net,
struct vhost_virtqueue *vq) struct vhost_virtqueue *vq)
{ {
struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
int i; int i;
int j = 0; int j = 0;
for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
vhost_net_tx_err(net); vhost_net_tx_err(net);
if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
...@@ -168,7 +280,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, ...@@ -168,7 +280,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net,
break; break;
} }
if (j) if (j)
vq->done_idx = i; nvq->done_idx = i;
return j; return j;
} }
...@@ -198,7 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) ...@@ -198,7 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
* read-size critical section for our kind of RCU. */ * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net) static void handle_tx(struct vhost_net *net)
{ {
struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned out, in, s; unsigned out, in, s;
int head; int head;
struct msghdr msg = { struct msghdr msg = {
...@@ -224,8 +337,8 @@ static void handle_tx(struct vhost_net *net) ...@@ -224,8 +337,8 @@ static void handle_tx(struct vhost_net *net)
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
hdr_size = vq->vhost_hlen; hdr_size = nvq->vhost_hlen;
zcopy = vq->ubufs; zcopy = nvq->ubufs;
for (;;) { for (;;) {
/* Release DMAs done buffers first */ /* Release DMAs done buffers first */
...@@ -246,9 +359,10 @@ static void handle_tx(struct vhost_net *net) ...@@ -246,9 +359,10 @@ static void handle_tx(struct vhost_net *net)
/* If more outstanding DMAs, queue the work. /* If more outstanding DMAs, queue the work.
* Handle upend_idx wrap around * Handle upend_idx wrap around
*/ */
num_pends = likely(vq->upend_idx >= vq->done_idx) ? num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
(vq->upend_idx - vq->done_idx) : (nvq->upend_idx - nvq->done_idx) :
(vq->upend_idx + UIO_MAXIOV - vq->done_idx); (nvq->upend_idx + UIO_MAXIOV -
nvq->done_idx);
if (unlikely(num_pends > VHOST_MAX_PEND)) if (unlikely(num_pends > VHOST_MAX_PEND))
break; break;
if (unlikely(vhost_enable_notify(&net->dev, vq))) { if (unlikely(vhost_enable_notify(&net->dev, vq))) {
...@@ -263,45 +377,45 @@ static void handle_tx(struct vhost_net *net) ...@@ -263,45 +377,45 @@ static void handle_tx(struct vhost_net *net)
break; break;
} }
/* Skip header. TODO: support TSO. */ /* Skip header. TODO: support TSO. */
s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
msg.msg_iovlen = out; msg.msg_iovlen = out;
len = iov_length(vq->iov, out); len = iov_length(vq->iov, out);
/* Sanity check */ /* Sanity check */
if (!len) { if (!len) {
vq_err(vq, "Unexpected header len for TX: " vq_err(vq, "Unexpected header len for TX: "
"%zd expected %zd\n", "%zd expected %zd\n",
iov_length(vq->hdr, s), hdr_size); iov_length(nvq->hdr, s), hdr_size);
break; break;
} }
zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
vq->upend_idx != vq->done_idx); nvq->upend_idx != nvq->done_idx);
/* use msg_control to pass vhost zerocopy ubuf info to skb */ /* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) { if (zcopy_used) {
vq->heads[vq->upend_idx].id = head; vq->heads[nvq->upend_idx].id = head;
if (!vhost_net_tx_select_zcopy(net) || if (!vhost_net_tx_select_zcopy(net) ||
len < VHOST_GOODCOPY_LEN) { len < VHOST_GOODCOPY_LEN) {
/* copy don't need to wait for DMA done */ /* copy don't need to wait for DMA done */
vq->heads[vq->upend_idx].len = vq->heads[nvq->upend_idx].len =
VHOST_DMA_DONE_LEN; VHOST_DMA_DONE_LEN;
msg.msg_control = NULL; msg.msg_control = NULL;
msg.msg_controllen = 0; msg.msg_controllen = 0;
ubufs = NULL; ubufs = NULL;
} else { } else {
struct ubuf_info *ubuf; struct ubuf_info *ubuf;
ubuf = vq->ubuf_info + vq->upend_idx; ubuf = nvq->ubuf_info + nvq->upend_idx;
vq->heads[vq->upend_idx].len = vq->heads[nvq->upend_idx].len =
VHOST_DMA_IN_PROGRESS; VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback; ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = vq->ubufs; ubuf->ctx = nvq->ubufs;
ubuf->desc = vq->upend_idx; ubuf->desc = nvq->upend_idx;
msg.msg_control = ubuf; msg.msg_control = ubuf;
msg.msg_controllen = sizeof(ubuf); msg.msg_controllen = sizeof(ubuf);
ubufs = vq->ubufs; ubufs = nvq->ubufs;
kref_get(&ubufs->kref); kref_get(&ubufs->kref);
} }
vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
} }
/* TODO: Check specific error and bomb out unless ENOBUFS? */ /* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(NULL, sock, &msg, len); err = sock->ops->sendmsg(NULL, sock, &msg, len);
...@@ -309,8 +423,8 @@ static void handle_tx(struct vhost_net *net) ...@@ -309,8 +423,8 @@ static void handle_tx(struct vhost_net *net)
if (zcopy_used) { if (zcopy_used) {
if (ubufs) if (ubufs)
vhost_ubuf_put(ubufs); vhost_ubuf_put(ubufs);
vq->upend_idx = ((unsigned)vq->upend_idx - 1) % nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
UIO_MAXIOV; % UIO_MAXIOV;
} }
vhost_discard_vq_desc(vq, 1); vhost_discard_vq_desc(vq, 1);
break; break;
...@@ -417,7 +531,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, ...@@ -417,7 +531,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
* read-size critical section for our kind of RCU. */ * read-size critical section for our kind of RCU. */
static void handle_rx(struct vhost_net *net) static void handle_rx(struct vhost_net *net)
{ {
struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned uninitialized_var(in), log; unsigned uninitialized_var(in), log;
struct vhost_log *vq_log; struct vhost_log *vq_log;
struct msghdr msg = { struct msghdr msg = {
...@@ -445,8 +560,8 @@ static void handle_rx(struct vhost_net *net) ...@@ -445,8 +560,8 @@ static void handle_rx(struct vhost_net *net)
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
vhost_hlen = vq->vhost_hlen; vhost_hlen = nvq->vhost_hlen;
sock_hlen = vq->sock_hlen; sock_hlen = nvq->sock_hlen;
vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
vq->log : NULL; vq->log : NULL;
...@@ -476,11 +591,11 @@ static void handle_rx(struct vhost_net *net) ...@@ -476,11 +591,11 @@ static void handle_rx(struct vhost_net *net)
/* We don't need to be notified again. */ /* We don't need to be notified again. */
if (unlikely((vhost_hlen))) if (unlikely((vhost_hlen)))
/* Skip header. TODO: support TSO. */ /* Skip header. TODO: support TSO. */
move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
else else
/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
* needed because recvmsg can modify msg_iov. */ * needed because recvmsg can modify msg_iov. */
copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
msg.msg_iovlen = in; msg.msg_iovlen = in;
err = sock->ops->recvmsg(NULL, sock, &msg, err = sock->ops->recvmsg(NULL, sock, &msg,
sock_len, MSG_DONTWAIT | MSG_TRUNC); sock_len, MSG_DONTWAIT | MSG_TRUNC);
...@@ -494,7 +609,7 @@ static void handle_rx(struct vhost_net *net) ...@@ -494,7 +609,7 @@ static void handle_rx(struct vhost_net *net)
continue; continue;
} }
if (unlikely(vhost_hlen) && if (unlikely(vhost_hlen) &&
memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
vhost_hlen)) { vhost_hlen)) {
vq_err(vq, "Unable to write vnet_hdr at addr %p\n", vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
vq->iov->iov_base); vq->iov->iov_base);
...@@ -502,7 +617,7 @@ static void handle_rx(struct vhost_net *net) ...@@ -502,7 +617,7 @@ static void handle_rx(struct vhost_net *net)
} }
/* TODO: Should check and handle checksum. */ /* TODO: Should check and handle checksum. */
if (likely(mergeable) && if (likely(mergeable) &&
memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount,
offsetof(typeof(hdr), num_buffers), offsetof(typeof(hdr), num_buffers),
sizeof hdr.num_buffers)) { sizeof hdr.num_buffers)) {
vq_err(vq, "Failed num_buffers write"); vq_err(vq, "Failed num_buffers write");
...@@ -559,17 +674,34 @@ static int vhost_net_open(struct inode *inode, struct file *f) ...@@ -559,17 +674,34 @@ static int vhost_net_open(struct inode *inode, struct file *f)
{ {
struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
struct vhost_dev *dev; struct vhost_dev *dev;
int r; struct vhost_virtqueue **vqs;
int r, i;
if (!n) if (!n)
return -ENOMEM; return -ENOMEM;
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
kfree(n);
return -ENOMEM;
}
dev = &n->dev; dev = &n->dev;
n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq;
n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
n->vqs[i].ubufs = NULL;
n->vqs[i].ubuf_info = NULL;
n->vqs[i].upend_idx = 0;
n->vqs[i].done_idx = 0;
n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0;
}
r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
if (r < 0) { if (r < 0) {
kfree(n); kfree(n);
kfree(vqs);
return r; return r;
} }
...@@ -584,7 +716,9 @@ static int vhost_net_open(struct inode *inode, struct file *f) ...@@ -584,7 +716,9 @@ static int vhost_net_open(struct inode *inode, struct file *f)
static void vhost_net_disable_vq(struct vhost_net *n, static void vhost_net_disable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq) struct vhost_virtqueue *vq)
{ {
struct vhost_poll *poll = n->poll + (vq - n->vqs); struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
if (!vq->private_data) if (!vq->private_data)
return; return;
vhost_poll_stop(poll); vhost_poll_stop(poll);
...@@ -593,7 +727,9 @@ static void vhost_net_disable_vq(struct vhost_net *n, ...@@ -593,7 +727,9 @@ static void vhost_net_disable_vq(struct vhost_net *n,
static int vhost_net_enable_vq(struct vhost_net *n, static int vhost_net_enable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq) struct vhost_virtqueue *vq)
{ {
struct vhost_poll *poll = n->poll + (vq - n->vqs); struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
struct socket *sock; struct socket *sock;
sock = rcu_dereference_protected(vq->private_data, sock = rcu_dereference_protected(vq->private_data,
...@@ -621,30 +757,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, ...@@ -621,30 +757,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
struct socket **rx_sock) struct socket **rx_sock)
{ {
*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq);
*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq);
} }
static void vhost_net_flush_vq(struct vhost_net *n, int index) static void vhost_net_flush_vq(struct vhost_net *n, int index)
{ {
vhost_poll_flush(n->poll + index); vhost_poll_flush(n->poll + index);
vhost_poll_flush(&n->dev.vqs[index].poll); vhost_poll_flush(&n->vqs[index].vq.poll);
} }
static void vhost_net_flush(struct vhost_net *n) static void vhost_net_flush(struct vhost_net *n)
{ {
vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
vhost_net_flush_vq(n, VHOST_NET_VQ_RX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) { if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
n->tx_flush = true; n->tx_flush = true;
mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
/* Wait for all lower device DMAs done. */ /* Wait for all lower device DMAs done. */
vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs); vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
n->tx_flush = false; n->tx_flush = false;
kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref); kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
} }
} }
...@@ -658,6 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) ...@@ -658,6 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
vhost_net_flush(n); vhost_net_flush(n);
vhost_dev_stop(&n->dev); vhost_dev_stop(&n->dev);
vhost_dev_cleanup(&n->dev, false); vhost_dev_cleanup(&n->dev, false);
vhost_net_vq_reset(n);
if (tx_sock) if (tx_sock)
fput(tx_sock->file); fput(tx_sock->file);
if (rx_sock) if (rx_sock)
...@@ -665,6 +802,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) ...@@ -665,6 +802,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
/* We do an extra flush before freeing memory, /* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */ * since jobs can re-queue themselves. */
vhost_net_flush(n); vhost_net_flush(n);
kfree(n->dev.vqs);
kfree(n); kfree(n);
return 0; return 0;
} }
...@@ -738,6 +876,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) ...@@ -738,6 +876,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
{ {
struct socket *sock, *oldsock; struct socket *sock, *oldsock;
struct vhost_virtqueue *vq; struct vhost_virtqueue *vq;
struct vhost_net_virtqueue *nvq;
struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
int r; int r;
...@@ -750,7 +889,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) ...@@ -750,7 +889,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
r = -ENOBUFS; r = -ENOBUFS;
goto err; goto err;
} }
vq = n->vqs + index; vq = &n->vqs[index].vq;
nvq = &n->vqs[index];
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
/* Verify that ring has been setup correctly. */ /* Verify that ring has been setup correctly. */
...@@ -783,8 +923,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) ...@@ -783,8 +923,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
if (r) if (r)
goto err_used; goto err_used;
oldubufs = vq->ubufs; oldubufs = nvq->ubufs;
vq->ubufs = ubufs; nvq->ubufs = ubufs;
n->tx_packets = 0; n->tx_packets = 0;
n->tx_zcopy_err = 0; n->tx_zcopy_err = 0;
...@@ -827,14 +967,21 @@ static long vhost_net_reset_owner(struct vhost_net *n) ...@@ -827,14 +967,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL; struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL; struct socket *rx_sock = NULL;
long err; long err;
struct vhost_memory *memory;
mutex_lock(&n->dev.mutex); mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev); err = vhost_dev_check_owner(&n->dev);
if (err) if (err)
goto done; goto done;
memory = vhost_dev_reset_owner_prepare();
if (!memory) {
err = -ENOMEM;
goto done;
}
vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n); vhost_net_flush(n);
err = vhost_dev_reset_owner(&n->dev); vhost_dev_reset_owner(&n->dev, memory);
vhost_net_vq_reset(n);
done: done:
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
if (tx_sock) if (tx_sock)
...@@ -870,10 +1017,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) ...@@ -870,10 +1017,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
n->dev.acked_features = features; n->dev.acked_features = features;
smp_wmb(); smp_wmb();
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].mutex); mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen; n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].mutex); mutex_unlock(&n->vqs[i].vq.mutex);
} }
vhost_net_flush(n); vhost_net_flush(n);
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
...@@ -910,11 +1057,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, ...@@ -910,11 +1057,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return vhost_net_reset_owner(n); return vhost_net_reset_owner(n);
default: default:
mutex_lock(&n->dev.mutex); mutex_lock(&n->dev.mutex);
if (ioctl == VHOST_SET_OWNER) {
r = vhost_net_set_ubuf_info(n);
if (r)
goto out;
}
r = vhost_dev_ioctl(&n->dev, ioctl, argp); r = vhost_dev_ioctl(&n->dev, ioctl, argp);
if (r == -ENOIOCTLCMD) if (r == -ENOIOCTLCMD)
r = vhost_vring_ioctl(&n->dev, ioctl, argp); r = vhost_vring_ioctl(&n->dev, ioctl, argp);
else else
vhost_net_flush(n); vhost_net_flush(n);
out:
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
return r; return r;
} }
......
...@@ -45,14 +45,116 @@ ...@@ -45,14 +45,116 @@
#include <target/target_core_configfs.h> #include <target/target_core_configfs.h>
#include <target/configfs_macros.h> #include <target/configfs_macros.h>
#include <linux/vhost.h> #include <linux/vhost.h>
#include <linux/virtio_net.h> /* TODO vhost.h currently depends on this */
#include <linux/virtio_scsi.h> #include <linux/virtio_scsi.h>
#include <linux/llist.h> #include <linux/llist.h>
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include "vhost.c" #include "vhost.c"
#include "vhost.h" #include "vhost.h"
#include "tcm_vhost.h"
#define TCM_VHOST_VERSION "v0.1"
#define TCM_VHOST_NAMELEN 256
#define TCM_VHOST_MAX_CDB_SIZE 32
struct vhost_scsi_inflight {
/* Wait for the flush operation to finish */
struct completion comp;
/* Refcount for the inflight reqs */
struct kref kref;
};
struct tcm_vhost_cmd {
/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
int tvc_vq_desc;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
/* virtio-scsi initiator data direction */
enum dma_data_direction tvc_data_direction;
/* Expected data transfer length from virtio-scsi header */
u32 tvc_exp_data_len;
/* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
u64 tvc_tag;
/* The number of scatterlists associated with this cmd */
u32 tvc_sgl_count;
/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
u32 tvc_lun;
/* Pointer to the SGL formatted memory from virtio-scsi */
struct scatterlist *tvc_sgl;
/* Pointer to response */
struct virtio_scsi_cmd_resp __user *tvc_resp;
/* Pointer to vhost_scsi for our device */
struct vhost_scsi *tvc_vhost;
/* Pointer to vhost_virtqueue for the cmd */
struct vhost_virtqueue *tvc_vq;
/* Pointer to vhost nexus memory */
struct tcm_vhost_nexus *tvc_nexus;
/* The TCM I/O descriptor that is accessed via container_of() */
struct se_cmd tvc_se_cmd;
/* work item used for cmwq dispatch to tcm_vhost_submission_work() */
struct work_struct work;
/* Copy of the incoming SCSI command descriptor block (CDB) */
unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
/* Sense buffer that will be mapped into outgoing status */
unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
/* Completed commands list, serviced from vhost worker thread */
struct llist_node tvc_completion_list;
/* Used to track inflight cmd */
struct vhost_scsi_inflight *inflight;
};
struct tcm_vhost_nexus {
/* Pointer to TCM session for I_T Nexus */
struct se_session *tvn_se_sess;
};
struct tcm_vhost_nacl {
/* Binary World Wide unique Port Name for Vhost Initiator port */
u64 iport_wwpn;
/* ASCII formatted WWPN for Sas Initiator port */
char iport_name[TCM_VHOST_NAMELEN];
/* Returned by tcm_vhost_make_nodeacl() */
struct se_node_acl se_node_acl;
};
struct vhost_scsi;
struct tcm_vhost_tpg {
/* Vhost port target portal group tag for TCM */
u16 tport_tpgt;
/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
int tv_tpg_port_count;
/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
int tv_tpg_vhost_count;
/* list for tcm_vhost_list */
struct list_head tv_tpg_list;
/* Used to protect access for tpg_nexus */
struct mutex tv_tpg_mutex;
/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
struct tcm_vhost_nexus *tpg_nexus;
/* Pointer back to tcm_vhost_tport */
struct tcm_vhost_tport *tport;
/* Returned by tcm_vhost_make_tpg() */
struct se_portal_group se_tpg;
/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
struct vhost_scsi *vhost_scsi;
};
struct tcm_vhost_tport {
/* SCSI protocol the tport is providing */
u8 tport_proto_id;
/* Binary World Wide unique Port Name for Vhost Target port */
u64 tport_wwpn;
/* ASCII formatted WWPN for Vhost Target port */
char tport_name[TCM_VHOST_NAMELEN];
/* Returned by tcm_vhost_make_tport() */
struct se_wwn tport_wwn;
};
struct tcm_vhost_evt {
/* event to be sent to guest */
struct virtio_scsi_event event;
/* event list, serviced from vhost worker thread */
struct llist_node list;
};
enum { enum {
VHOST_SCSI_VQ_CTL = 0, VHOST_SCSI_VQ_CTL = 0,
...@@ -74,13 +176,28 @@ enum { ...@@ -74,13 +176,28 @@ enum {
#define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_VQ 128
#define VHOST_SCSI_MAX_EVENT 128 #define VHOST_SCSI_MAX_EVENT 128
struct vhost_scsi_virtqueue {
struct vhost_virtqueue vq;
/*
* Reference counting for inflight reqs, used for flush operation. At
* each time, one reference tracks new commands submitted, while we
* wait for another one to reach 0.
*/
struct vhost_scsi_inflight inflights[2];
/*
* Indicate current inflight in use, protected by vq->mutex.
* Writers must also take dev mutex and flush under it.
*/
int inflight_idx;
};
struct vhost_scsi { struct vhost_scsi {
/* Protected by vhost_scsi->dev.mutex */ /* Protected by vhost_scsi->dev.mutex */
struct tcm_vhost_tpg **vs_tpg; struct tcm_vhost_tpg **vs_tpg;
char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
struct vhost_dev dev; struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
struct vhost_work vs_completion_work; /* cmd completion work item */ struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */ struct llist_head vs_completion_list; /* cmd completion queue */
...@@ -107,6 +224,59 @@ static int iov_num_pages(struct iovec *iov) ...@@ -107,6 +224,59 @@ static int iov_num_pages(struct iovec *iov)
((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
} }
void tcm_vhost_done_inflight(struct kref *kref)
{
struct vhost_scsi_inflight *inflight;
inflight = container_of(kref, struct vhost_scsi_inflight, kref);
complete(&inflight->comp);
}
static void tcm_vhost_init_inflight(struct vhost_scsi *vs,
struct vhost_scsi_inflight *old_inflight[])
{
struct vhost_scsi_inflight *new_inflight;
struct vhost_virtqueue *vq;
int idx, i;
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
/* store old infight */
idx = vs->vqs[i].inflight_idx;
if (old_inflight)
old_inflight[i] = &vs->vqs[i].inflights[idx];
/* setup new infight */
vs->vqs[i].inflight_idx = idx ^ 1;
new_inflight = &vs->vqs[i].inflights[idx ^ 1];
kref_init(&new_inflight->kref);
init_completion(&new_inflight->comp);
mutex_unlock(&vq->mutex);
}
}
static struct vhost_scsi_inflight *
tcm_vhost_get_inflight(struct vhost_virtqueue *vq)
{
struct vhost_scsi_inflight *inflight;
struct vhost_scsi_virtqueue *svq;
svq = container_of(vq, struct vhost_scsi_virtqueue, vq);
inflight = &svq->inflights[svq->inflight_idx];
kref_get(&inflight->kref);
return inflight;
}
static void tcm_vhost_put_inflight(struct vhost_scsi_inflight *inflight)
{
kref_put(&inflight->kref, tcm_vhost_done_inflight);
}
static int tcm_vhost_check_true(struct se_portal_group *se_tpg) static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
{ {
return 1; return 1;
...@@ -366,7 +536,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) ...@@ -366,7 +536,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
u32 event, u32 reason) u32 event, u32 reason)
{ {
struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
struct tcm_vhost_evt *evt; struct tcm_vhost_evt *evt;
if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) { if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
...@@ -403,13 +573,15 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) ...@@ -403,13 +573,15 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
kfree(tv_cmd->tvc_sgl); kfree(tv_cmd->tvc_sgl);
} }
tcm_vhost_put_inflight(tv_cmd->inflight);
kfree(tv_cmd); kfree(tv_cmd);
} }
static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
struct tcm_vhost_evt *evt) struct tcm_vhost_evt *evt)
{ {
struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
struct virtio_scsi_event *event = &evt->event; struct virtio_scsi_event *event = &evt->event;
struct virtio_scsi_event __user *eventp; struct virtio_scsi_event __user *eventp;
unsigned out, in; unsigned out, in;
...@@ -460,7 +632,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work) ...@@ -460,7 +632,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work)
{ {
struct vhost_scsi *vs = container_of(work, struct vhost_scsi, struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
vs_event_work); vs_event_work);
struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
struct tcm_vhost_evt *evt; struct tcm_vhost_evt *evt;
struct llist_node *llnode; struct llist_node *llnode;
...@@ -511,8 +683,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -511,8 +683,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
v_rsp.sense_len); v_rsp.sense_len);
ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
if (likely(ret == 0)) { if (likely(ret == 0)) {
struct vhost_scsi_virtqueue *q;
vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0);
vq = tv_cmd->tvc_vq - vs->vqs; q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs;
__set_bit(vq, signal); __set_bit(vq, signal);
} else } else
pr_err("Faulted on virtio_scsi_cmd_resp\n"); pr_err("Faulted on virtio_scsi_cmd_resp\n");
...@@ -523,10 +697,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -523,10 +697,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vq = -1; vq = -1;
while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
< VHOST_SCSI_MAX_VQ) < VHOST_SCSI_MAX_VQ)
vhost_signal(&vs->dev, &vs->vqs[vq]); vhost_signal(&vs->dev, &vs->vqs[vq].vq);
} }
static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
struct vhost_virtqueue *vq,
struct tcm_vhost_tpg *tv_tpg, struct tcm_vhost_tpg *tv_tpg,
struct virtio_scsi_cmd_req *v_req, struct virtio_scsi_cmd_req *v_req,
u32 exp_data_len, u32 exp_data_len,
...@@ -551,6 +726,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( ...@@ -551,6 +726,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
tv_cmd->tvc_exp_data_len = exp_data_len; tv_cmd->tvc_exp_data_len = exp_data_len;
tv_cmd->tvc_data_direction = data_direction; tv_cmd->tvc_data_direction = data_direction;
tv_cmd->tvc_nexus = tv_nexus; tv_cmd->tvc_nexus = tv_nexus;
tv_cmd->inflight = tcm_vhost_get_inflight(vq);
return tv_cmd; return tv_cmd;
} }
...@@ -806,7 +982,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, ...@@ -806,7 +982,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
for (i = 0; i < data_num; i++) for (i = 0; i < data_num; i++)
exp_data_len += vq->iov[data_first + i].iov_len; exp_data_len += vq->iov[data_first + i].iov_len;
tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req, tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req,
exp_data_len, data_direction); exp_data_len, data_direction);
if (IS_ERR(tv_cmd)) { if (IS_ERR(tv_cmd)) {
vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
...@@ -938,17 +1114,35 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) ...@@ -938,17 +1114,35 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
{ {
vhost_poll_flush(&vs->dev.vqs[index].poll); vhost_poll_flush(&vs->vqs[index].vq.poll);
} }
/* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs) static void vhost_scsi_flush(struct vhost_scsi *vs)
{ {
struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
int i; int i;
/* Init new inflight and remember the old inflight */
tcm_vhost_init_inflight(vs, old_inflight);
/*
* The inflight->kref was initialized to 1. We decrement it here to
* indicate the start of the flush operation so that it will reach 0
* when all the reqs are finished.
*/
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
kref_put(&old_inflight[i]->kref, tcm_vhost_done_inflight);
/* Flush both the vhost poll and vhost work */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
vhost_scsi_flush_vq(vs, i); vhost_scsi_flush_vq(vs, i);
vhost_work_flush(&vs->dev, &vs->vs_completion_work); vhost_work_flush(&vs->dev, &vs->vs_completion_work);
vhost_work_flush(&vs->dev, &vs->vs_event_work); vhost_work_flush(&vs->dev, &vs->vs_event_work);
/* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
wait_for_completion(&old_inflight[i]->comp);
} }
/* /*
...@@ -975,7 +1169,7 @@ static int vhost_scsi_set_endpoint( ...@@ -975,7 +1169,7 @@ static int vhost_scsi_set_endpoint(
/* Verify that ring has been setup correctly. */ /* Verify that ring has been setup correctly. */
for (index = 0; index < vs->dev.nvqs; ++index) { for (index = 0; index < vs->dev.nvqs; ++index) {
/* Verify that ring has been setup correctly. */ /* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(&vs->vqs[index])) { if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
...@@ -1022,7 +1216,7 @@ static int vhost_scsi_set_endpoint( ...@@ -1022,7 +1216,7 @@ static int vhost_scsi_set_endpoint(
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn)); sizeof(vs->vs_vhost_wwpn));
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i]; vq = &vs->vqs[i].vq;
/* Flushing the vhost_work acts as synchronize_rcu */ /* Flushing the vhost_work acts as synchronize_rcu */
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
rcu_assign_pointer(vq->private_data, vs_tpg); rcu_assign_pointer(vq->private_data, vs_tpg);
...@@ -1063,7 +1257,7 @@ static int vhost_scsi_clear_endpoint( ...@@ -1063,7 +1257,7 @@ static int vhost_scsi_clear_endpoint(
mutex_lock(&vs->dev.mutex); mutex_lock(&vs->dev.mutex);
/* Verify that ring has been setup correctly. */ /* Verify that ring has been setup correctly. */
for (index = 0; index < vs->dev.nvqs; ++index) { for (index = 0; index < vs->dev.nvqs; ++index) {
if (!vhost_vq_access_ok(&vs->vqs[index])) { if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
ret = -EFAULT; ret = -EFAULT;
goto err_dev; goto err_dev;
} }
...@@ -1103,7 +1297,7 @@ static int vhost_scsi_clear_endpoint( ...@@ -1103,7 +1297,7 @@ static int vhost_scsi_clear_endpoint(
} }
if (match) { if (match) {
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i]; vq = &vs->vqs[i].vq;
/* Flushing the vhost_work acts as synchronize_rcu */ /* Flushing the vhost_work acts as synchronize_rcu */
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
rcu_assign_pointer(vq->private_data, NULL); rcu_assign_pointer(vq->private_data, NULL);
...@@ -1151,24 +1345,39 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) ...@@ -1151,24 +1345,39 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
static int vhost_scsi_open(struct inode *inode, struct file *f) static int vhost_scsi_open(struct inode *inode, struct file *f)
{ {
struct vhost_scsi *s; struct vhost_scsi *s;
struct vhost_virtqueue **vqs;
int r, i; int r, i;
s = kzalloc(sizeof(*s), GFP_KERNEL); s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) if (!s)
return -ENOMEM; return -ENOMEM;
vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
kfree(s);
return -ENOMEM;
}
vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work);
vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work); vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work);
s->vs_events_nr = 0; s->vs_events_nr = 0;
s->vs_events_missed = false; s->vs_events_missed = false;
s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick; vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq;
s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick; vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq;
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
s->vqs[i].handle_kick = vhost_scsi_handle_kick; s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ); for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
vqs[i] = &s->vqs[i].vq;
s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ);
tcm_vhost_init_inflight(s, NULL);
if (r < 0) { if (r < 0) {
kfree(vqs);
kfree(s); kfree(s);
return r; return r;
} }
...@@ -1190,6 +1399,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) ...@@ -1190,6 +1399,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_dev_cleanup(&s->dev, false); vhost_dev_cleanup(&s->dev, false);
/* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
vhost_scsi_flush(s); vhost_scsi_flush(s);
kfree(s->dev.vqs);
kfree(s); kfree(s);
return 0; return 0;
} }
...@@ -1205,7 +1415,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, ...@@ -1205,7 +1415,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
u32 events_missed; u32 events_missed;
u64 features; u64 features;
int r, abi_version = VHOST_SCSI_ABI_VERSION; int r, abi_version = VHOST_SCSI_ABI_VERSION;
struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
switch (ioctl) { switch (ioctl) {
case VHOST_SCSI_SET_ENDPOINT: case VHOST_SCSI_SET_ENDPOINT:
...@@ -1333,7 +1543,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, ...@@ -1333,7 +1543,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
else else
reason = VIRTIO_SCSI_EVT_RESET_REMOVED; reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
tcm_vhost_send_evt(vs, tpg, lun, tcm_vhost_send_evt(vs, tpg, lun,
VIRTIO_SCSI_T_TRANSPORT_RESET, reason); VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
...@@ -1926,7 +2136,8 @@ static void tcm_vhost_exit(void) ...@@ -1926,7 +2136,8 @@ static void tcm_vhost_exit(void)
destroy_workqueue(tcm_vhost_workqueue); destroy_workqueue(tcm_vhost_workqueue);
}; };
MODULE_DESCRIPTION("TCM_VHOST series fabric driver"); MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
MODULE_ALIAS("tcm_vhost");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
module_init(tcm_vhost_init); module_init(tcm_vhost_init);
module_exit(tcm_vhost_exit); module_exit(tcm_vhost_exit);
#define TCM_VHOST_VERSION "v0.1"
#define TCM_VHOST_NAMELEN 256
#define TCM_VHOST_MAX_CDB_SIZE 32
struct tcm_vhost_cmd {
/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
int tvc_vq_desc;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
/* virtio-scsi initiator data direction */
enum dma_data_direction tvc_data_direction;
/* Expected data transfer length from virtio-scsi header */
u32 tvc_exp_data_len;
/* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
u64 tvc_tag;
/* The number of scatterlists associated with this cmd */
u32 tvc_sgl_count;
/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
u32 tvc_lun;
/* Pointer to the SGL formatted memory from virtio-scsi */
struct scatterlist *tvc_sgl;
/* Pointer to response */
struct virtio_scsi_cmd_resp __user *tvc_resp;
/* Pointer to vhost_scsi for our device */
struct vhost_scsi *tvc_vhost;
/* Pointer to vhost_virtqueue for the cmd */
struct vhost_virtqueue *tvc_vq;
/* Pointer to vhost nexus memory */
struct tcm_vhost_nexus *tvc_nexus;
/* The TCM I/O descriptor that is accessed via container_of() */
struct se_cmd tvc_se_cmd;
/* work item used for cmwq dispatch to tcm_vhost_submission_work() */
struct work_struct work;
/* Copy of the incoming SCSI command descriptor block (CDB) */
unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
/* Sense buffer that will be mapped into outgoing status */
unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
/* Completed commands list, serviced from vhost worker thread */
struct llist_node tvc_completion_list;
};
struct tcm_vhost_nexus {
/* Pointer to TCM session for I_T Nexus */
struct se_session *tvn_se_sess;
};
struct tcm_vhost_nacl {
/* Binary World Wide unique Port Name for Vhost Initiator port */
u64 iport_wwpn;
/* ASCII formatted WWPN for Sas Initiator port */
char iport_name[TCM_VHOST_NAMELEN];
/* Returned by tcm_vhost_make_nodeacl() */
struct se_node_acl se_node_acl;
};
struct vhost_scsi;
struct tcm_vhost_tpg {
/* Vhost port target portal group tag for TCM */
u16 tport_tpgt;
/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
int tv_tpg_port_count;
/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
int tv_tpg_vhost_count;
/* list for tcm_vhost_list */
struct list_head tv_tpg_list;
/* Used to protect access for tpg_nexus */
struct mutex tv_tpg_mutex;
/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
struct tcm_vhost_nexus *tpg_nexus;
/* Pointer back to tcm_vhost_tport */
struct tcm_vhost_tport *tport;
/* Returned by tcm_vhost_make_tpg() */
struct se_portal_group se_tpg;
/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
struct vhost_scsi *vhost_scsi;
};
struct tcm_vhost_tport {
/* SCSI protocol the tport is providing */
u8 tport_proto_id;
/* Binary World Wide unique Port Name for Vhost Target port */
u64 tport_wwpn;
/* ASCII formatted WWPN for Vhost Target port */
char tport_name[TCM_VHOST_NAMELEN];
/* Returned by tcm_vhost_make_tport() */
struct se_wwn tport_wwn;
};
struct tcm_vhost_evt {
/* event to be sent to guest */
struct virtio_scsi_event event;
/* event list, serviced from vhost worker thread */
struct llist_node list;
};
/*
* As per request from MST, keep TCM_VHOST related ioctl defines out of
* linux/vhost.h (user-space) for now..
*/
#include <linux/vhost.h>
/*
* Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
*
* ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
* RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
* ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
* All the targets under vhost_wwpn can be seen and used by guset.
*/
#define VHOST_SCSI_ABI_VERSION 1
struct vhost_scsi_target {
int abi_version;
char vhost_wwpn[TRANSPORT_IQN_LEN];
unsigned short vhost_tpgt;
unsigned short reserved;
};
/* VHOST_SCSI specific defines */
#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
/* Changing this breaks userspace. */
#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
/* Set and get the events missed flag */
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
...@@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) ...@@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
{ {
void *priv = NULL; void *priv = NULL;
long err; long err;
struct vhost_memory *memory;
mutex_lock(&n->dev.mutex); mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev); err = vhost_dev_check_owner(&n->dev);
if (err) if (err)
goto done; goto done;
memory = vhost_dev_reset_owner_prepare();
if (!memory) {
err = -ENOMEM;
goto done;
}
vhost_test_stop(n, &priv); vhost_test_stop(n, &priv);
vhost_test_flush(n); vhost_test_flush(n);
err = vhost_dev_reset_owner(&n->dev); vhost_dev_reset_owner(&n->dev, memory);
done: done:
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
return err; return err;
......
...@@ -33,8 +33,6 @@ enum { ...@@ -33,8 +33,6 @@ enum {
VHOST_MEMORY_F_LOG = 0x1, VHOST_MEMORY_F_LOG = 0x1,
}; };
static unsigned vhost_zcopy_mask __read_mostly;
#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
...@@ -181,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, ...@@ -181,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->used_flags = 0; vq->used_flags = 0;
vq->log_used = false; vq->log_used = false;
vq->log_addr = -1ull; vq->log_addr = -1ull;
vq->vhost_hlen = 0;
vq->sock_hlen = 0;
vq->private_data = NULL; vq->private_data = NULL;
vq->log_base = NULL; vq->log_base = NULL;
vq->error_ctx = NULL; vq->error_ctx = NULL;
...@@ -191,9 +187,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, ...@@ -191,9 +187,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL; vq->call_ctx = NULL;
vq->call = NULL; vq->call = NULL;
vq->log_ctx = NULL; vq->log_ctx = NULL;
vq->upend_idx = 0;
vq->done_idx = 0;
vq->ubufs = NULL;
} }
static int vhost_worker(void *data) static int vhost_worker(void *data)
...@@ -253,43 +246,29 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) ...@@ -253,43 +246,29 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
vq->log = NULL; vq->log = NULL;
kfree(vq->heads); kfree(vq->heads);
vq->heads = NULL; vq->heads = NULL;
kfree(vq->ubuf_info);
vq->ubuf_info = NULL;
}
void vhost_enable_zcopy(int vq)
{
vhost_zcopy_mask |= 0x1 << vq;
} }
/* Helper to allocate iovec buffers for all vqs. */ /* Helper to allocate iovec buffers for all vqs. */
static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
{ {
int i; int i;
bool zcopy;
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect *
UIO_MAXIOV, GFP_KERNEL); UIO_MAXIOV, GFP_KERNEL);
dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV,
GFP_KERNEL); GFP_KERNEL);
dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads *
UIO_MAXIOV, GFP_KERNEL); UIO_MAXIOV, GFP_KERNEL);
zcopy = vhost_zcopy_mask & (0x1 << i); if (!dev->vqs[i]->indirect || !dev->vqs[i]->log ||
if (zcopy) !dev->vqs[i]->heads)
dev->vqs[i].ubuf_info =
kmalloc(sizeof *dev->vqs[i].ubuf_info *
UIO_MAXIOV, GFP_KERNEL);
if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
!dev->vqs[i].heads ||
(zcopy && !dev->vqs[i].ubuf_info))
goto err_nomem; goto err_nomem;
} }
return 0; return 0;
err_nomem: err_nomem:
for (; i >= 0; --i) for (; i >= 0; --i)
vhost_vq_free_iovecs(&dev->vqs[i]); vhost_vq_free_iovecs(dev->vqs[i]);
return -ENOMEM; return -ENOMEM;
} }
...@@ -298,11 +277,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) ...@@ -298,11 +277,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
int i; int i;
for (i = 0; i < dev->nvqs; ++i) for (i = 0; i < dev->nvqs; ++i)
vhost_vq_free_iovecs(&dev->vqs[i]); vhost_vq_free_iovecs(dev->vqs[i]);
} }
long vhost_dev_init(struct vhost_dev *dev, long vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue *vqs, int nvqs) struct vhost_virtqueue **vqs, int nvqs)
{ {
int i; int i;
...@@ -318,16 +297,15 @@ long vhost_dev_init(struct vhost_dev *dev, ...@@ -318,16 +297,15 @@ long vhost_dev_init(struct vhost_dev *dev,
dev->worker = NULL; dev->worker = NULL;
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
dev->vqs[i].log = NULL; dev->vqs[i]->log = NULL;
dev->vqs[i].indirect = NULL; dev->vqs[i]->indirect = NULL;
dev->vqs[i].heads = NULL; dev->vqs[i]->heads = NULL;
dev->vqs[i].ubuf_info = NULL; dev->vqs[i]->dev = dev;
dev->vqs[i].dev = dev; mutex_init(&dev->vqs[i]->mutex);
mutex_init(&dev->vqs[i].mutex); vhost_vq_reset(dev, dev->vqs[i]);
vhost_vq_reset(dev, dev->vqs + i); if (dev->vqs[i]->handle_kick)
if (dev->vqs[i].handle_kick) vhost_poll_init(&dev->vqs[i]->poll,
vhost_poll_init(&dev->vqs[i].poll, dev->vqs[i]->handle_kick, POLLIN, dev);
dev->vqs[i].handle_kick, POLLIN, dev);
} }
return 0; return 0;
...@@ -408,21 +386,19 @@ static long vhost_dev_set_owner(struct vhost_dev *dev) ...@@ -408,21 +386,19 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
return err; return err;
} }
/* Caller should have device mutex */ struct vhost_memory *vhost_dev_reset_owner_prepare(void)
long vhost_dev_reset_owner(struct vhost_dev *dev)
{ {
struct vhost_memory *memory; return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
}
/* Restore memory to default empty mapping. */
memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
if (!memory)
return -ENOMEM;
/* Caller should have device mutex */
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
{
vhost_dev_cleanup(dev, true); vhost_dev_cleanup(dev, true);
/* Restore memory to default empty mapping. */
memory->nregions = 0; memory->nregions = 0;
RCU_INIT_POINTER(dev->memory, memory); RCU_INIT_POINTER(dev->memory, memory);
return 0;
} }
void vhost_dev_stop(struct vhost_dev *dev) void vhost_dev_stop(struct vhost_dev *dev)
...@@ -430,9 +406,9 @@ void vhost_dev_stop(struct vhost_dev *dev) ...@@ -430,9 +406,9 @@ void vhost_dev_stop(struct vhost_dev *dev)
int i; int i;
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
vhost_poll_stop(&dev->vqs[i].poll); vhost_poll_stop(&dev->vqs[i]->poll);
vhost_poll_flush(&dev->vqs[i].poll); vhost_poll_flush(&dev->vqs[i]->poll);
} }
} }
} }
...@@ -443,17 +419,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) ...@@ -443,17 +419,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
int i; int i;
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
if (dev->vqs[i].error_ctx) if (dev->vqs[i]->error_ctx)
eventfd_ctx_put(dev->vqs[i].error_ctx); eventfd_ctx_put(dev->vqs[i]->error_ctx);
if (dev->vqs[i].error) if (dev->vqs[i]->error)
fput(dev->vqs[i].error); fput(dev->vqs[i]->error);
if (dev->vqs[i].kick) if (dev->vqs[i]->kick)
fput(dev->vqs[i].kick); fput(dev->vqs[i]->kick);
if (dev->vqs[i].call_ctx) if (dev->vqs[i]->call_ctx)
eventfd_ctx_put(dev->vqs[i].call_ctx); eventfd_ctx_put(dev->vqs[i]->call_ctx);
if (dev->vqs[i].call) if (dev->vqs[i]->call)
fput(dev->vqs[i].call); fput(dev->vqs[i]->call);
vhost_vq_reset(dev, dev->vqs + i); vhost_vq_reset(dev, dev->vqs[i]);
} }
vhost_dev_free_iovecs(dev); vhost_dev_free_iovecs(dev);
if (dev->log_ctx) if (dev->log_ctx)
...@@ -524,14 +500,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, ...@@ -524,14 +500,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
for (i = 0; i < d->nvqs; ++i) { for (i = 0; i < d->nvqs; ++i) {
int ok; int ok;
mutex_lock(&d->vqs[i].mutex); mutex_lock(&d->vqs[i]->mutex);
/* If ring is inactive, will check when it's enabled. */ /* If ring is inactive, will check when it's enabled. */
if (d->vqs[i].private_data) if (d->vqs[i]->private_data)
ok = vq_memory_access_ok(d->vqs[i].log_base, mem, ok = vq_memory_access_ok(d->vqs[i]->log_base, mem,
log_all); log_all);
else else
ok = 1; ok = 1;
mutex_unlock(&d->vqs[i].mutex); mutex_unlock(&d->vqs[i]->mutex);
if (!ok) if (!ok)
return 0; return 0;
} }
...@@ -641,7 +617,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) ...@@ -641,7 +617,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
if (idx >= d->nvqs) if (idx >= d->nvqs)
return -ENOBUFS; return -ENOBUFS;
vq = d->vqs + idx; vq = d->vqs[idx];
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
...@@ -852,7 +828,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) ...@@ -852,7 +828,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
for (i = 0; i < d->nvqs; ++i) { for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq; struct vhost_virtqueue *vq;
void __user *base = (void __user *)(unsigned long)p; void __user *base = (void __user *)(unsigned long)p;
vq = d->vqs + i; vq = d->vqs[i];
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
/* If ring is inactive, will check when it's enabled. */ /* If ring is inactive, will check when it's enabled. */
if (vq->private_data && !vq_log_access_ok(d, vq, base)) if (vq->private_data && !vq_log_access_ok(d, vq, base))
...@@ -879,9 +855,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) ...@@ -879,9 +855,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
} else } else
filep = eventfp; filep = eventfp;
for (i = 0; i < d->nvqs; ++i) { for (i = 0; i < d->nvqs; ++i) {
mutex_lock(&d->vqs[i].mutex); mutex_lock(&d->vqs[i]->mutex);
d->vqs[i].log_ctx = d->log_ctx; d->vqs[i]->log_ctx = d->log_ctx;
mutex_unlock(&d->vqs[i].mutex); mutex_unlock(&d->vqs[i]->mutex);
} }
if (ctx) if (ctx)
eventfd_ctx_put(ctx); eventfd_ctx_put(ctx);
...@@ -1551,38 +1527,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) ...@@ -1551,38 +1527,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->used->flags, r); &vq->used->flags, r);
} }
} }
static void vhost_zerocopy_done_signal(struct kref *kref)
{
struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
kref);
wake_up(&ubufs->wait);
}
struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
bool zcopy)
{
struct vhost_ubuf_ref *ubufs;
/* No zero copy backend? Nothing to count. */
if (!zcopy)
return NULL;
ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
if (!ubufs)
return ERR_PTR(-ENOMEM);
kref_init(&ubufs->kref);
init_waitqueue_head(&ubufs->wait);
ubufs->vq = vq;
return ubufs;
}
void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
{
kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
}
void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
{
kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
kfree(ubufs);
}
...@@ -54,18 +54,6 @@ struct vhost_log { ...@@ -54,18 +54,6 @@ struct vhost_log {
struct vhost_virtqueue; struct vhost_virtqueue;
struct vhost_ubuf_ref {
struct kref kref;
wait_queue_head_t wait;
struct vhost_virtqueue *vq;
};
struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
void vhost_ubuf_put(struct vhost_ubuf_ref *);
void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
struct ubuf_info;
/* The virtqueue structure describes a queue attached to a device. */ /* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue { struct vhost_virtqueue {
struct vhost_dev *dev; struct vhost_dev *dev;
...@@ -114,10 +102,7 @@ struct vhost_virtqueue { ...@@ -114,10 +102,7 @@ struct vhost_virtqueue {
/* hdr is used to store the virtio header. /* hdr is used to store the virtio header.
* Since each iovec has >= 1 byte length, we never need more than * Since each iovec has >= 1 byte length, we never need more than
* header length entries to store the header. */ * header length entries to store the header. */
struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
struct iovec *indirect; struct iovec *indirect;
size_t vhost_hlen;
size_t sock_hlen;
struct vring_used_elem *heads; struct vring_used_elem *heads;
/* We use a kind of RCU to access private pointer. /* We use a kind of RCU to access private pointer.
* All readers access it from worker, which makes it possible to * All readers access it from worker, which makes it possible to
...@@ -130,16 +115,6 @@ struct vhost_virtqueue { ...@@ -130,16 +115,6 @@ struct vhost_virtqueue {
/* Log write descriptors */ /* Log write descriptors */
void __user *log_base; void __user *log_base;
struct vhost_log *log; struct vhost_log *log;
/* vhost zerocopy support fields below: */
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
/* first used idx for DMA done zerocopy buffers */
int done_idx;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_ubuf_ref *ubufs;
}; };
struct vhost_dev { struct vhost_dev {
...@@ -150,7 +125,7 @@ struct vhost_dev { ...@@ -150,7 +125,7 @@ struct vhost_dev {
struct mm_struct *mm; struct mm_struct *mm;
struct mutex mutex; struct mutex mutex;
unsigned acked_features; unsigned acked_features;
struct vhost_virtqueue *vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
struct file *log_file; struct file *log_file;
struct eventfd_ctx *log_ctx; struct eventfd_ctx *log_ctx;
...@@ -159,9 +134,10 @@ struct vhost_dev { ...@@ -159,9 +134,10 @@ struct vhost_dev {
struct task_struct *worker; struct task_struct *worker;
}; };
long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_check_owner(struct vhost_dev *);
long vhost_dev_reset_owner(struct vhost_dev *); struct vhost_memory *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_cleanup(struct vhost_dev *, bool locked);
void vhost_dev_stop(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
......
...@@ -127,4 +127,32 @@ struct vhost_memory { ...@@ -127,4 +127,32 @@ struct vhost_memory {
/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
#define VHOST_NET_F_VIRTIO_NET_HDR 27 #define VHOST_NET_F_VIRTIO_NET_HDR 27
/* VHOST_SCSI specific definitions */
/*
* Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
*
* ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
* RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
* ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
* All the targets under vhost_wwpn can be seen and used by guset.
*/
#define VHOST_SCSI_ABI_VERSION 1
struct vhost_scsi_target {
int abi_version;
char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
unsigned short vhost_tpgt;
unsigned short reserved;
};
#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
/* Changing this breaks userspace. */
#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
/* Set and get the events missed flag */
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment