Commit 13816c76 authored by Rusty Russell's avatar Rusty Russell

virtio_ring: virtqueue_add_sgs, to add multiple sgs.

virtio_scsi can really use this, to avoid the current hack of copying
the whole sg array.  Some other things get slightly neater, too.

This causes a slowdown in virtqueue_add_buf(), which is implemented as
a wrapper.  This is addressed in the next patches.

for i in `seq 50`; do /usr/bin/time -f 'Wall time:%e' ./vringh_test --indirect --eventidx --parallel --fast-vringh; done 2>&1 | stats --trim-outliers:

Before:
	Using CPUS 0 and 3
	Guest: notified 0, pinged 39009-39063(39062)
	Host: notified 39009-39063(39062), pinged 0
	Wall time:1.700000-1.950000(1.723542)

After:
	Using CPUS 0 and 3
	Guest: notified 0, pinged 39062-39063(39063)
	Host: notified 39062-39063(39063), pinged 0
	Wall time:1.760000-2.220000(1.789167)
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Reviewed-by: default avatarWanlong Gao <gaowanlong@cn.fujitsu.com>
Reviewed-by: default avatarAsias He <asias@redhat.com>
parent c8164d89
...@@ -98,16 +98,36 @@ struct vring_virtqueue ...@@ -98,16 +98,36 @@ struct vring_virtqueue
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
unsigned int *count)
{
return sg_next(sg);
}
static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
unsigned int *count)
{
if (--(*count) == 0)
return NULL;
return sg + 1;
}
/* Set up an indirect table of descriptors and add it to the queue. */ /* Set up an indirect table of descriptors and add it to the queue. */
static int vring_add_indirect(struct vring_virtqueue *vq, static inline int vring_add_indirect(struct vring_virtqueue *vq,
struct scatterlist sg[], struct scatterlist *sgs[],
unsigned int out, struct scatterlist *(*next)
unsigned int in, (struct scatterlist *, unsigned int *),
gfp_t gfp) unsigned int total_sg,
unsigned int total_out,
unsigned int total_in,
unsigned int out_sgs,
unsigned int in_sgs,
gfp_t gfp)
{ {
struct vring_desc *desc; struct vring_desc *desc;
unsigned head; unsigned head;
int i; struct scatterlist *sg;
int i, n;
/* /*
* We require lowmem mappings for the descriptors because * We require lowmem mappings for the descriptors because
...@@ -116,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq, ...@@ -116,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
*/ */
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc) if (!desc)
return -ENOMEM; return -ENOMEM;
/* Transfer entries from the sg list into the indirect page */ /* Transfer entries from the sg lists into the indirect page */
for (i = 0; i < out; i++) { i = 0;
desc[i].flags = VRING_DESC_F_NEXT; for (n = 0; n < out_sgs; n++) {
desc[i].addr = sg_phys(sg); for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
desc[i].len = sg->length; desc[i].flags = VRING_DESC_F_NEXT;
desc[i].next = i+1; desc[i].addr = sg_phys(sg);
sg++; desc[i].len = sg->length;
desc[i].next = i+1;
i++;
}
} }
for (; i < (out + in); i++) { for (; n < (out_sgs + in_sgs); n++) {
desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
desc[i].addr = sg_phys(sg); desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
desc[i].len = sg->length; desc[i].addr = sg_phys(sg);
desc[i].next = i+1; desc[i].len = sg->length;
sg++; desc[i].next = i+1;
i++;
}
} }
BUG_ON(i != total_sg);
/* Last one doesn't continue. */ /* Last one doesn't continue. */
desc[i-1].flags &= ~VRING_DESC_F_NEXT; desc[i-1].flags &= ~VRING_DESC_F_NEXT;
...@@ -155,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq, ...@@ -155,29 +181,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
return head; return head;
} }
/** static inline int virtqueue_add(struct virtqueue *_vq,
* virtqueue_add_buf - expose buffer to other end struct scatterlist *sgs[],
* @vq: the struct virtqueue we're talking about. struct scatterlist *(*next)
* @sg: the description of the buffer(s). (struct scatterlist *, unsigned int *),
* @out_num: the number of sg readable by other side unsigned int total_out,
* @in_num: the number of sg which are writable (after readable ones) unsigned int total_in,
* @data: the token identifying the buffer. unsigned int out_sgs,
* @gfp: how to do memory allocations (if necessary). unsigned int in_sgs,
* void *data,
* Caller must ensure we don't call this with other virtqueue operations gfp_t gfp)
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_buf(struct virtqueue *_vq,
struct scatterlist sg[],
unsigned int out,
unsigned int in,
void *data,
gfp_t gfp)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int i, avail, uninitialized_var(prev); struct scatterlist *sg;
unsigned int i, n, avail, uninitialized_var(prev), total_sg;
int head; int head;
START_USE(vq); START_USE(vq);
...@@ -197,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq, ...@@ -197,46 +214,54 @@ int virtqueue_add_buf(struct virtqueue *_vq,
} }
#endif #endif
total_sg = total_in + total_out;
/* If the host supports indirect descriptor tables, and we have multiple /* If the host supports indirect descriptor tables, and we have multiple
* buffers, then go indirect. FIXME: tune this threshold */ * buffers, then go indirect. FIXME: tune this threshold */
if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
head = vring_add_indirect(vq, sg, out, in, gfp); head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
total_in,
out_sgs, in_sgs, gfp);
if (likely(head >= 0)) if (likely(head >= 0))
goto add_head; goto add_head;
} }
BUG_ON(out + in > vq->vring.num); BUG_ON(total_sg > vq->vring.num);
BUG_ON(out + in == 0); BUG_ON(total_sg == 0);
if (vq->vq.num_free < out + in) { if (vq->vq.num_free < total_sg) {
pr_debug("Can't add buf len %i - avail = %i\n", pr_debug("Can't add buf len %i - avail = %i\n",
out + in, vq->vq.num_free); total_sg, vq->vq.num_free);
/* FIXME: for historical reasons, we force a notify here if /* FIXME: for historical reasons, we force a notify here if
* there are outgoing parts to the buffer. Presumably the * there are outgoing parts to the buffer. Presumably the
* host should service the ring ASAP. */ * host should service the ring ASAP. */
if (out) if (out_sgs)
vq->notify(&vq->vq); vq->notify(&vq->vq);
END_USE(vq); END_USE(vq);
return -ENOSPC; return -ENOSPC;
} }
/* We're about to use some buffers from the free list. */ /* We're about to use some buffers from the free list. */
vq->vq.num_free -= out + in; vq->vq.num_free -= total_sg;
head = vq->free_head; head = i = vq->free_head;
for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { for (n = 0; n < out_sgs; n++) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT; for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
vq->vring.desc[i].len = sg->length; vq->vring.desc[i].addr = sg_phys(sg);
prev = i; vq->vring.desc[i].len = sg->length;
sg++; prev = i;
i = vq->vring.desc[i].next;
}
} }
for (; in; i = vq->vring.desc[i].next, in--) { for (; n < (out_sgs + in_sgs); n++) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
vq->vring.desc[i].len = sg->length; vq->vring.desc[i].addr = sg_phys(sg);
prev = i; vq->vring.desc[i].len = sg->length;
sg++; prev = i;
i = vq->vring.desc[i].next;
}
} }
/* Last one doesn't continue. */ /* Last one doesn't continue. */
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
...@@ -269,8 +294,77 @@ int virtqueue_add_buf(struct virtqueue *_vq, ...@@ -269,8 +294,77 @@ int virtqueue_add_buf(struct virtqueue *_vq,
return 0; return 0;
} }
/**
* virtqueue_add_buf - expose buffer to other end
* @vq: the struct virtqueue we're talking about.
* @sg: the description of the buffer(s).
* @out_num: the number of sg readable by other side
* @in_num: the number of sg which are writable (after readable ones)
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_buf(struct virtqueue *_vq,
struct scatterlist sg[],
unsigned int out,
unsigned int in,
void *data,
gfp_t gfp)
{
struct scatterlist *sgs[2];
sgs[0] = sg;
sgs[1] = sg + out;
return virtqueue_add(_vq, sgs, sg_next_arr,
out, in, out ? 1 : 0, in ? 1 : 0, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_buf); EXPORT_SYMBOL_GPL(virtqueue_add_buf);
/**
* virtqueue_add_sgs - expose buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sgs: array of terminated scatterlists.
* @out_num: the number of scatterlists readable by other side
* @in_num: the number of scatterlists which are writable (after readable ones)
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_sgs(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
unsigned int i, total_out, total_in;
/* Count them first. */
for (i = total_out = total_in = 0; i < out_sgs; i++) {
struct scatterlist *sg;
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_out++;
}
for (; i < out_sgs + in_sgs; i++) {
struct scatterlist *sg;
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_in++;
}
return virtqueue_add(_vq, sgs, sg_next_chained,
total_out, total_in, out_sgs, in_sgs, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
/** /**
* virtqueue_kick_prepare - first half of split virtqueue_kick call. * virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @vq: the struct virtqueue * @vq: the struct virtqueue
......
...@@ -41,6 +41,13 @@ int virtqueue_add_buf(struct virtqueue *vq, ...@@ -41,6 +41,13 @@ int virtqueue_add_buf(struct virtqueue *vq,
void *data, void *data,
gfp_t gfp); gfp_t gfp);
int virtqueue_add_sgs(struct virtqueue *vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp);
void virtqueue_kick(struct virtqueue *vq); void virtqueue_kick(struct virtqueue *vq);
bool virtqueue_kick_prepare(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq);
......
...@@ -125,6 +125,22 @@ static inline void sg_mark_end(struct scatterlist *sg) ...@@ -125,6 +125,22 @@ static inline void sg_mark_end(struct scatterlist *sg)
sg->page_link &= ~0x01; sg->page_link &= ~0x01;
} }
/**
* sg_unmark_end - Undo setting the end of the scatterlist
* @sg: SG entryScatterlist
*
* Description:
* Removes the termination marker from the given entry of the scatterlist.
*
**/
static inline void sg_unmark_end(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
#endif
sg->page_link &= ~0x02;
}
static inline struct scatterlist *sg_next(struct scatterlist *sg) static inline struct scatterlist *sg_next(struct scatterlist *sg)
{ {
#ifdef CONFIG_DEBUG_SG #ifdef CONFIG_DEBUG_SG
......
...@@ -56,6 +56,13 @@ int virtqueue_add_buf(struct virtqueue *vq, ...@@ -56,6 +56,13 @@ int virtqueue_add_buf(struct virtqueue *vq,
void *data, void *data,
gfp_t gfp); gfp_t gfp);
int virtqueue_add_sgs(struct virtqueue *vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp);
void virtqueue_kick(struct virtqueue *vq); void virtqueue_kick(struct virtqueue *vq);
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment