Commit 6e06c0e2 authored by David S. Miller's avatar David S. Miller

Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Michael S. Tsirkin says:

--------------------
There are mostly bugfixes here.
I hope to merge some more patches by 3.5, in particular
vlan support fixes are waiting for Eric's ack,
and a version of tracepoint patch might be
ready in time, but let's merge what's ready so it's testable.

This includes a ton of zerocopy fixes by Jason -
good stuff but too intrusive for 3.4 and zerocopy is experimental
anyway.

virtio supported delayed interrupt for a while now
so adding support to the virtio tool made sense
--------------------
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3a084ddb c70aa540
...@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
if (copy > size) { if (copy > size) {
++from; ++from;
--count; --count;
} offset = 0;
} else
offset += size;
copy -= size; copy -= size;
offset1 += size; offset1 += size;
offset = 0;
} }
if (len == offset1) if (len == offset1)
...@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
struct page *page[MAX_SKB_FRAGS]; struct page *page[MAX_SKB_FRAGS];
int num_pages; int num_pages;
unsigned long base; unsigned long base;
unsigned long truesize;
len = from->iov_len - offset1; len = from->iov_len - offset;
if (!len) { if (!len) {
offset1 = 0; offset = 0;
++from; ++from;
continue; continue;
} }
base = (unsigned long)from->iov_base + offset1; base = (unsigned long)from->iov_base + offset;
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
if (i + size > MAX_SKB_FRAGS)
return -EMSGSIZE;
num_pages = get_user_pages_fast(base, size, 0, &page[i]); num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if ((num_pages != size) || if (num_pages != size) {
(num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) for (i = 0; i < num_pages; i++)
/* put_page is in skb free */ put_page(page[i]);
return -EFAULT; return -EFAULT;
}
truesize = size * PAGE_SIZE;
skb->data_len += len; skb->data_len += len;
skb->len += len; skb->len += len;
skb->truesize += len; skb->truesize += truesize;
atomic_add(len, &skb->sk->sk_wmem_alloc); atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) { while (len) {
int off = base & ~PAGE_MASK; int off = base & ~PAGE_MASK;
int size = min_t(int, len, PAGE_SIZE - off); int size = min_t(int, len, PAGE_SIZE - off);
...@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
len -= size; len -= size;
i++; i++;
} }
offset1 = 0; offset = 0;
++from; ++from;
} }
return 0; return 0;
...@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, ...@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
int err; int err;
struct virtio_net_hdr vnet_hdr = { 0 }; struct virtio_net_hdr vnet_hdr = { 0 };
int vnet_hdr_len = 0; int vnet_hdr_len = 0;
int copylen; int copylen = 0;
bool zerocopy = false; bool zerocopy = false;
if (q->flags & IFF_VNET_HDR) { if (q->flags & IFF_VNET_HDR) {
...@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, ...@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (unlikely(len < ETH_HLEN)) if (unlikely(len < ETH_HLEN))
goto err; goto err;
err = -EMSGSIZE;
if (unlikely(count > UIO_MAXIOV))
goto err;
if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
zerocopy = true; zerocopy = true;
if (zerocopy) { if (zerocopy) {
/* Userspace may produce vectors with count greater than
* MAX_SKB_FRAGS, so we need to linearize parts of the skb
* to let the rest of data to be fit in the frags.
*/
if (count > MAX_SKB_FRAGS) {
copylen = iov_length(iv, count - MAX_SKB_FRAGS);
if (copylen < vnet_hdr_len)
copylen = 0;
else
copylen -= vnet_hdr_len;
}
/* There are 256 bytes to be copied in skb, so there is enough /* There are 256 bytes to be copied in skb, so there is enough
* room for skb expand head in case it is used. * room for skb expand head in case it is used.
* The rest buffer is mapped from userspace. * The rest buffer is mapped from userspace.
*/ */
copylen = vnet_hdr.hdr_len; if (copylen < vnet_hdr.hdr_len)
copylen = vnet_hdr.hdr_len;
if (!copylen) if (!copylen)
copylen = GOODCOPY_LEN; copylen = GOODCOPY_LEN;
} else } else
...@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, ...@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (!skb) if (!skb)
goto err; goto err;
if (zerocopy) { if (zerocopy)
err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; else
} else
err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
len); len);
if (err) if (err)
...@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, ...@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
rcu_read_lock_bh(); rcu_read_lock_bh();
vlan = rcu_dereference_bh(q->vlan); vlan = rcu_dereference_bh(q->vlan);
/* copy skb_ubuf_info for callback when skb has no error */ /* copy skb_ubuf_info for callback when skb has no error */
if (zerocopy) if (zerocopy) {
skb_shinfo(skb)->destructor_arg = m->msg_control; skb_shinfo(skb)->destructor_arg = m->msg_control;
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
}
if (vlan) if (vlan)
macvlan_start_xmit(skb, vlan->dev); macvlan_start_xmit(skb, vlan->dev);
else else
......
...@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net) ...@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net)
if (wmem < sock->sk->sk_sndbuf / 2) if (wmem < sock->sk->sk_sndbuf / 2)
tx_poll_stop(net); tx_poll_stop(net);
hdr_size = vq->vhost_hlen; hdr_size = vq->vhost_hlen;
zcopy = vhost_sock_zcopy(sock); zcopy = vq->ubufs;
for (;;) { for (;;) {
/* Release DMAs done buffers first */ /* Release DMAs done buffers first */
...@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net) ...@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net)
UIO_MAXIOV; UIO_MAXIOV;
} }
vhost_discard_vq_desc(vq, 1); vhost_discard_vq_desc(vq, 1);
tx_poll_start(net, sock); if (err == -EAGAIN || err == -ENOBUFS)
tx_poll_start(net, sock);
break; break;
} }
if (err != len) if (err != len)
...@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net) ...@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net)
" len %d != %zd\n", err, len); " len %d != %zd\n", err, len);
if (!zcopy) if (!zcopy)
vhost_add_used_and_signal(&net->dev, vq, head, 0); vhost_add_used_and_signal(&net->dev, vq, head, 0);
else
vhost_zerocopy_signal_used(vq);
total_len += len; total_len += len;
if (unlikely(total_len >= VHOST_NET_WEIGHT)) { if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
vhost_poll_queue(&vq->poll); vhost_poll_queue(&vq->poll);
......
...@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf) ...@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf)
struct vhost_ubuf_ref *ubufs = ubuf->ctx; struct vhost_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq; struct vhost_virtqueue *vq = ubufs->vq;
vhost_poll_queue(&vq->poll);
/* set len = 1 to mark this desc buffers done DMA */ /* set len = 1 to mark this desc buffers done DMA */
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
kref_put(&ubufs->kref, vhost_zerocopy_done_signal); kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
......
...@@ -203,6 +203,7 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); ...@@ -203,6 +203,7 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
void virtqueue_disable_cb(struct virtqueue *vq); void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
void *virtqueue_detach_unused_buf(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq);
struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtqueue *vring_new_virtqueue(unsigned int num,
......
...@@ -144,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev) ...@@ -144,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev)
} }
} }
static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) static void run_test(struct vdev_info *dev, struct vq_info *vq,
bool delayed, int bufs)
{ {
struct scatterlist sl; struct scatterlist sl;
long started = 0, completed = 0; long started = 0, completed = 0;
...@@ -183,8 +184,12 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) ...@@ -183,8 +184,12 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
assert(started <= bufs); assert(started <= bufs);
if (completed == bufs) if (completed == bufs)
break; break;
if (virtqueue_enable_cb(vq->vq)) { if (delayed) {
wait_for_interrupt(dev); if (virtqueue_enable_cb_delayed(vq->vq))
wait_for_interrupt(dev);
} else {
if (virtqueue_enable_cb(vq->vq))
wait_for_interrupt(dev);
} }
} }
test = 0; test = 0;
...@@ -215,6 +220,14 @@ const struct option longopts[] = { ...@@ -215,6 +220,14 @@ const struct option longopts[] = {
.name = "no-indirect", .name = "no-indirect",
.val = 'i', .val = 'i',
}, },
{
.name = "delayed-interrupt",
.val = 'D',
},
{
.name = "no-delayed-interrupt",
.val = 'd',
},
{ {
} }
}; };
...@@ -224,6 +237,7 @@ static void help() ...@@ -224,6 +237,7 @@ static void help()
fprintf(stderr, "Usage: virtio_test [--help]" fprintf(stderr, "Usage: virtio_test [--help]"
" [--no-indirect]" " [--no-indirect]"
" [--no-event-idx]" " [--no-event-idx]"
" [--delayed-interrupt]"
"\n"); "\n");
} }
...@@ -233,6 +247,7 @@ int main(int argc, char **argv) ...@@ -233,6 +247,7 @@ int main(int argc, char **argv)
unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX); (1ULL << VIRTIO_RING_F_EVENT_IDX);
int o; int o;
bool delayed = false;
for (;;) { for (;;) {
o = getopt_long(argc, argv, optstring, longopts, NULL); o = getopt_long(argc, argv, optstring, longopts, NULL);
...@@ -251,6 +266,9 @@ int main(int argc, char **argv) ...@@ -251,6 +266,9 @@ int main(int argc, char **argv)
case 'i': case 'i':
features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
break; break;
case 'D':
delayed = true;
break;
default: default:
assert(0); assert(0);
break; break;
...@@ -260,6 +278,6 @@ int main(int argc, char **argv) ...@@ -260,6 +278,6 @@ int main(int argc, char **argv)
done: done:
vdev_info_init(&dev, features); vdev_info_init(&dev, features);
vq_info_add(&dev, 256); vq_info_add(&dev, 256);
run_test(&dev, &dev.vqs[0], 0x100000); run_test(&dev, &dev.vqs[0], delayed, 0x100000);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment