Commit 72b5e895 authored by Jason Wang's avatar Jason Wang Committed by Michael S. Tsirkin

virtio-ring: store DMA metadata in desc_extra for split virtqueue

For split virtqueue, we used to depend on the address, length and
flags stored in the descriptor ring for DMA unmapping. This is unsafe
for the case since the device can manipulate the behavior of virtio
driver, IOMMU drivers and swiotlb.

For safety, maintain the DMA address, DMA length, descriptor flags and
next filed of the non indirect descriptors in vring_desc_state_extra
when DMA API is used for virtio as we did for packed virtqueue and use
those metadata for performing DMA operations. Indirect descriptors
should be safe since they are using streaming mappings.

With this the descriptor ring is write only form the view of the
driver.

This slight increase the footprint of the drive but it's not noticed
through pktgen (64B) test and netperf test in the case of virtio-net.
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20210604055350.58753-8-jasowang@redhat.comSigned-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 5bc72234
...@@ -133,6 +133,7 @@ struct vring_virtqueue { ...@@ -133,6 +133,7 @@ struct vring_virtqueue {
/* Per-descriptor state. */ /* Per-descriptor state. */
struct vring_desc_state_split *desc_state; struct vring_desc_state_split *desc_state;
struct vring_desc_extra *desc_extra;
/* DMA address and size information */ /* DMA address and size information */
dma_addr_t queue_dma_addr; dma_addr_t queue_dma_addr;
...@@ -367,7 +368,7 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, ...@@ -367,7 +368,7 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
* Split ring specific functions - *_split(). * Split ring specific functions - *_split().
*/ */
static void vring_unmap_one_split(const struct vring_virtqueue *vq, static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
struct vring_desc *desc) struct vring_desc *desc)
{ {
u16 flags; u16 flags;
...@@ -392,6 +393,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq, ...@@ -392,6 +393,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq,
} }
} }
static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
unsigned int i)
{
struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;
if (!vq->use_dma_api)
goto out;
flags = extra[i].flags;
if (flags & VRING_DESC_F_INDIRECT) {
dma_unmap_single(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
}
out:
return extra[i].next;
}
static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
unsigned int total_sg, unsigned int total_sg,
gfp_t gfp) gfp_t gfp)
...@@ -420,13 +450,28 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, ...@@ -420,13 +450,28 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
unsigned int i, unsigned int i,
dma_addr_t addr, dma_addr_t addr,
unsigned int len, unsigned int len,
u16 flags) u16 flags,
bool indirect)
{ {
struct vring_virtqueue *vring = to_vvq(vq);
struct vring_desc_extra *extra = vring->split.desc_extra;
u16 next;
desc[i].flags = cpu_to_virtio16(vq->vdev, flags); desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
desc[i].addr = cpu_to_virtio64(vq->vdev, addr); desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
desc[i].len = cpu_to_virtio32(vq->vdev, len); desc[i].len = cpu_to_virtio32(vq->vdev, len);
return virtio16_to_cpu(vq->vdev, desc[i].next); if (!indirect) {
next = extra[i].next;
desc[i].next = cpu_to_virtio16(vq->vdev, next);
extra[i].addr = addr;
extra[i].len = len;
extra[i].flags = flags;
} else
next = virtio16_to_cpu(vq->vdev, desc[i].next);
return next;
} }
static inline int virtqueue_add_split(struct virtqueue *_vq, static inline int virtqueue_add_split(struct virtqueue *_vq,
...@@ -502,8 +547,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, ...@@ -502,8 +547,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
goto unmap_release; goto unmap_release;
prev = i; prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
VRING_DESC_F_NEXT); VRING_DESC_F_NEXT,
indirect);
} }
} }
for (; n < (out_sgs + in_sgs); n++) { for (; n < (out_sgs + in_sgs); n++) {
...@@ -513,14 +562,21 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, ...@@ -513,14 +562,21 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
goto unmap_release; goto unmap_release;
prev = i; prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
i = virtqueue_add_desc_split(_vq, desc, i, addr, i = virtqueue_add_desc_split(_vq, desc, i, addr,
sg->length, sg->length,
VRING_DESC_F_NEXT | VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE); VRING_DESC_F_WRITE,
indirect);
} }
} }
/* Last one doesn't continue. */ /* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
if (!indirect && vq->use_dma_api)
vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags =
~VRING_DESC_F_NEXT;
if (indirect) { if (indirect) {
/* Now that the indirect table is filled in, map it. */ /* Now that the indirect table is filled in, map it. */
...@@ -533,7 +589,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, ...@@ -533,7 +589,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
virtqueue_add_desc_split(_vq, vq->split.vring.desc, virtqueue_add_desc_split(_vq, vq->split.vring.desc,
head, addr, head, addr,
total_sg * sizeof(struct vring_desc), total_sg * sizeof(struct vring_desc),
VRING_DESC_F_INDIRECT); VRING_DESC_F_INDIRECT,
false);
} }
/* We're using some buffers from the free list. */ /* We're using some buffers from the free list. */
...@@ -541,8 +598,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, ...@@ -541,8 +598,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
/* Update free pointer */ /* Update free pointer */
if (indirect) if (indirect)
vq->free_head = virtio16_to_cpu(_vq->vdev, vq->free_head = vq->split.desc_extra[head].next;
vq->split.vring.desc[head].next);
else else
vq->free_head = i; vq->free_head = i;
...@@ -587,8 +643,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, ...@@ -587,8 +643,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
for (n = 0; n < total_sg; n++) { for (n = 0; n < total_sg; n++) {
if (i == err_idx) if (i == err_idx)
break; break;
vring_unmap_one_split(vq, &desc[i]); if (indirect) {
vring_unmap_one_split_indirect(vq, &desc[i]);
i = virtio16_to_cpu(_vq->vdev, desc[i].next); i = virtio16_to_cpu(_vq->vdev, desc[i].next);
} else
i = vring_unmap_one_split(vq, i);
} }
if (indirect) if (indirect)
...@@ -642,14 +701,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, ...@@ -642,14 +701,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
i = head; i = head;
while (vq->split.vring.desc[i].flags & nextflag) { while (vq->split.vring.desc[i].flags & nextflag) {
vring_unmap_one_split(vq, &vq->split.vring.desc[i]); vring_unmap_one_split(vq, i);
i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); i = vq->split.desc_extra[i].next;
vq->vq.num_free++; vq->vq.num_free++;
} }
vring_unmap_one_split(vq, &vq->split.vring.desc[i]); vring_unmap_one_split(vq, i);
vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->split.desc_extra[i].next = vq->free_head;
vq->free_head);
vq->free_head = head; vq->free_head = head;
/* Plus final descriptor */ /* Plus final descriptor */
...@@ -664,15 +722,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, ...@@ -664,15 +722,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
if (!indir_desc) if (!indir_desc)
return; return;
len = virtio32_to_cpu(vq->vq.vdev, len = vq->split.desc_extra[head].len;
vq->split.vring.desc[head].len);
BUG_ON(!(vq->split.vring.desc[head].flags & BUG_ON(!(vq->split.desc_extra[head].flags &
cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc)); BUG_ON(len == 0 || len % sizeof(struct vring_desc));
for (j = 0; j < len / sizeof(struct vring_desc); j++) for (j = 0; j < len / sizeof(struct vring_desc); j++)
vring_unmap_one_split(vq, &indir_desc[j]); vring_unmap_one_split_indirect(vq, &indir_desc[j]);
kfree(indir_desc); kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL; vq->split.desc_state[head].indir_desc = NULL;
...@@ -2108,7 +2165,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2108,7 +2165,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *), void (*callback)(struct virtqueue *),
const char *name) const char *name)
{ {
unsigned int i;
struct vring_virtqueue *vq; struct vring_virtqueue *vq;
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
...@@ -2164,16 +2220,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2164,16 +2220,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
if (!vq->split.desc_state) if (!vq->split.desc_state)
goto err_state; goto err_state;
vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
if (!vq->split.desc_extra)
goto err_extra;
/* Put everything in free lists. */ /* Put everything in free lists. */
vq->free_head = 0; vq->free_head = 0;
for (i = 0; i < vring.num-1; i++)
vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
memset(vq->split.desc_state, 0, vring.num * memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split)); sizeof(struct vring_desc_state_split));
list_add_tail(&vq->vq.list, &vdev->vqs); list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq; return &vq->vq;
err_extra:
kfree(vq->split.desc_state);
err_state: err_state:
kfree(vq); kfree(vq);
return NULL; return NULL;
...@@ -2257,8 +2317,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) ...@@ -2257,8 +2317,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
vq->split.queue_dma_addr); vq->split.queue_dma_addr);
} }
} }
if (!vq->packed_ring) if (!vq->packed_ring) {
kfree(vq->split.desc_state); kfree(vq->split.desc_state);
kfree(vq->split.desc_extra);
}
list_del(&_vq->list); list_del(&_vq->list);
kfree(vq); kfree(vq);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment