Commit 7a53e17a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - A huge patchset supporting vq resize using the new vq reset
   capability

 - Features, fixes, and cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
  vdpa/mlx5: Fix possible uninitialized return value
  vdpa_sim_blk: add support for discard and write-zeroes
  vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
  vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
  vdpa_sim_blk: check if sector is 0 for commands other than read or write
  vdpa_sim: Implement suspend vdpa op
  vhost-vdpa: uAPI to suspend the device
  vhost-vdpa: introduce SUSPEND backend feature bit
  vdpa: Add suspend operation
  virtio-blk: Avoid use-after-free on suspend/resume
  virtio_vdpa: support the arg sizes of find_vqs()
  vhost-vdpa: Call ida_simple_remove() when failed
  vDPA: fix 'cast to restricted le16' warnings in vdpa.c
  vDPA: !FEATURES_OK should not block querying device config space
  vDPA/ifcvf: support userspace to query features and MQ of a management device
  vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
  vhost scsi: Allow user to control num virtqueues
  vhost-scsi: Fix max number of virtqueues
  vdpa/mlx5: Support different address spaces for control and data
  vdpa/mlx5: Implement susupend virtqueue callback
  ...
parents 999324f5 93e530d2
...@@ -33,6 +33,10 @@ properties: ...@@ -33,6 +33,10 @@ properties:
description: Required for devices making accesses thru an IOMMU. description: Required for devices making accesses thru an IOMMU.
maxItems: 1 maxItems: 1
wakeup-source:
type: boolean
description: Required for setting irq of a virtio_mmio device as wakeup source.
required: required:
- compatible - compatible
- reg - reg
......
...@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, ...@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
goto error_create; goto error_create;
} }
vq->priv = info; vq->priv = info;
vq->num_max = num;
num = virtqueue_get_vring_size(vq); num = virtqueue_get_vring_size(vq);
if (vu_dev->protocol_features & if (vu_dev->protocol_features &
...@@ -1010,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, ...@@ -1010,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
......
...@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr) ...@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
} }
} }
static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
return vq;
}
static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
{ {
struct scatterlist hdr, status, *sgs[3]; struct scatterlist hdr, status, *sgs[3];
...@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist) ...@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
struct request *requeue_list = NULL; struct request *requeue_list = NULL;
rq_list_for_each_safe(rqlist, req, next) { rq_list_for_each_safe(rqlist, req, next) {
struct virtio_blk_vq *vq = req->mq_hctx->driver_data; struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
bool kick; bool kick;
if (!virtblk_prep_rq_batch(req)) { if (!virtblk_prep_rq_batch(req)) {
...@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob) ...@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{ {
struct virtio_blk *vblk = hctx->queue->queuedata; struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = hctx->driver_data; struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
struct virtblk_req *vbr; struct virtblk_req *vbr;
unsigned long flags; unsigned long flags;
unsigned int len; unsigned int len;
...@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) ...@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
return found; return found;
} }
static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct virtio_blk *vblk = data;
struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
hctx->driver_data = vq;
return 0;
}
static const struct blk_mq_ops virtio_mq_ops = { static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq, .queue_rq = virtio_queue_rq,
.queue_rqs = virtio_queue_rqs, .queue_rqs = virtio_queue_rqs,
.commit_rqs = virtio_commit_rqs, .commit_rqs = virtio_commit_rqs,
.init_hctx = virtblk_init_hctx,
.complete = virtblk_request_done, .complete = virtblk_request_done,
.map_queues = virtblk_map_queues, .map_queues = virtblk_map_queues,
.poll = virtblk_poll, .poll = virtblk_poll,
......
This diff is collapsed.
...@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev) ...@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
ndr_desc.res = &res; ndr_desc.res = &res;
ndr_desc.numa_node = nid; ndr_desc.numa_node = nid;
ndr_desc.flush = async_pmem_flush; ndr_desc.flush = async_pmem_flush;
ndr_desc.provider_data = vdev;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
set_bit(ND_REGION_ASYNC, &ndr_desc.flags); set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
/*
* The NVDIMM region could be available before the
* virtio_device_ready() that is called by
* virtio_dev_probe(), so we set device ready here.
*/
virtio_device_ready(vdev);
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
if (!nd_region) { if (!nd_region) {
dev_err(&vdev->dev, "failed to create nvdimm region\n"); dev_err(&vdev->dev, "failed to create nvdimm region\n");
err = -ENXIO; err = -ENXIO;
goto out_nd; goto out_nd;
} }
nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
return 0; return 0;
out_nd: out_nd:
virtio_reset_device(vdev);
nvdimm_bus_unregister(vpmem->nvdimm_bus); nvdimm_bus_unregister(vpmem->nvdimm_bus);
out_vq: out_vq:
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
......
...@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, ...@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, ...@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
goto error; goto error;
} }
vq->num_max = vring->num;
vqs[i] = vq; vqs[i] = vq;
vring->vq = vq; vring->vq = vq;
vq->priv = vring; vq->priv = vring;
......
...@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) ...@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
size_t size; size_t size;
/* actual size of vring (in bytes) */ /* actual size of vring (in bytes) */
size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); size = PAGE_ALIGN(vring_size(rvring->num, rvring->align));
rsc = (void *)rproc->table_ptr + rvdev->rsc_offset; rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
...@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i) ...@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
return -EINVAL; return -EINVAL;
} }
rvring->len = vring->num; rvring->num = vring->num;
rvring->align = vring->align; rvring->align = vring->align;
rvring->rvdev = rvdev; rvring->rvdev = rvdev;
......
...@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
struct fw_rsc_vdev *rsc; struct fw_rsc_vdev *rsc;
struct virtqueue *vq; struct virtqueue *vq;
void *addr; void *addr;
int len, size; int num, size;
/* we're temporarily limited to two virtqueues per rvdev */ /* we're temporarily limited to two virtqueues per rvdev */
if (id >= ARRAY_SIZE(rvdev->vring)) if (id >= ARRAY_SIZE(rvdev->vring))
...@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
rvring = &rvdev->vring[id]; rvring = &rvdev->vring[id];
addr = mem->va; addr = mem->va;
len = rvring->len; num = rvring->num;
/* zero vring */ /* zero vring */
size = vring_size(len, rvring->align); size = vring_size(num, rvring->align);
memset(addr, 0, size); memset(addr, 0, size);
dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n", dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n",
id, addr, len, rvring->notifyid); id, addr, num, rvring->notifyid);
/* /*
* Create the new vq, and tell virtio we're not interested in * Create the new vq, and tell virtio we're not interested in
* the 'weak' smp barriers, since we're talking with a real device. * the 'weak' smp barriers, since we're talking with a real device.
*/ */
vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx, vq = vring_new_virtqueue(id, num, rvring->align, vdev, false, ctx,
addr, rproc_virtio_notify, callback, name); addr, rproc_virtio_notify, callback, name);
if (!vq) { if (!vq) {
dev_err(dev, "vring_new_virtqueue %s failed\n", name); dev_err(dev, "vring_new_virtqueue %s failed\n", name);
...@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
vq->num_max = num;
rvring->vq = vq; rvring->vq = vq;
vq->priv = rvring; vq->priv = rvring;
...@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool * ctx, const bool * ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
......
...@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, ...@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
err = -ENOMEM; err = -ENOMEM;
goto out_err; goto out_err;
} }
vq->num_max = info->num;
/* it may have been reduced */ /* it may have been reduced */
info->num = virtqueue_get_vring_size(vq); info->num = virtqueue_get_vring_size(vq);
...@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
......
...@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector) ...@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
{ {
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
cfg = hw->common_cfg;
vp_iowrite16(vector, &cfg->msix_config); vp_iowrite16(vector, &cfg->msix_config);
return vp_ioread16(&cfg->msix_config); return vp_ioread16(&cfg->msix_config);
...@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) ...@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
break; break;
case VIRTIO_PCI_CAP_DEVICE_CFG: case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cap_addr(hw, &cap); hw->dev_cfg = get_cap_addr(hw, &cap);
hw->cap_dev_config_size = le32_to_cpu(cap.length);
IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg); IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
break; break;
} }
...@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features) ...@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
u32 ifcvf_get_config_size(struct ifcvf_hw *hw) u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
{ {
struct ifcvf_adapter *adapter; struct ifcvf_adapter *adapter;
u32 net_config_size = sizeof(struct virtio_net_config);
u32 blk_config_size = sizeof(struct virtio_blk_config);
u32 cap_size = hw->cap_dev_config_size;
u32 config_size; u32 config_size;
adapter = vf_to_adapter(hw); adapter = vf_to_adapter(hw);
/* If the onboard device config space size is greater than
* the size of struct virtio_net/blk_config, only the spec
* implementing contents size is returned, this is very
* unlikely, defensive programming.
*/
switch (hw->dev_type) { switch (hw->dev_type) {
case VIRTIO_ID_NET: case VIRTIO_ID_NET:
config_size = sizeof(struct virtio_net_config); config_size = min(cap_size, net_config_size);
break; break;
case VIRTIO_ID_BLOCK: case VIRTIO_ID_BLOCK:
config_size = sizeof(struct virtio_blk_config); config_size = min(cap_size, blk_config_size);
break; break;
default: default:
config_size = 0; config_size = 0;
......
...@@ -87,6 +87,8 @@ struct ifcvf_hw { ...@@ -87,6 +87,8 @@ struct ifcvf_hw {
int config_irq; int config_irq;
int vqs_reused_irq; int vqs_reused_irq;
u16 nr_vring; u16 nr_vring;
/* VIRTIO_PCI_CAP_DEVICE_CFG size */
u32 cap_dev_config_size;
}; };
struct ifcvf_adapter { struct ifcvf_adapter {
......
...@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic ...@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
} }
/* /*
* IFCVF currently does't have on-chip IOMMU, so not * IFCVF currently doesn't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap() * implemented set_map()/dma_map()/dma_unmap()
*/ */
static const struct vdpa_config_ops ifc_vdpa_ops = { static const struct vdpa_config_ops ifc_vdpa_ops = {
...@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ...@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct ifcvf_adapter *adapter; struct ifcvf_adapter *adapter;
struct vdpa_device *vdpa_dev;
struct pci_dev *pdev; struct pci_dev *pdev;
struct ifcvf_hw *vf; struct ifcvf_hw *vf;
struct device *dev; int ret;
int ret, i;
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev); ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
if (ifcvf_mgmt_dev->adapter) if (!ifcvf_mgmt_dev->adapter)
return -EOPNOTSUPP; return -EOPNOTSUPP;
pdev = ifcvf_mgmt_dev->pdev; adapter = ifcvf_mgmt_dev->adapter;
dev = &pdev->dev;
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, name, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
ifcvf_mgmt_dev->adapter = adapter;
vf = &adapter->vf; vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev); pdev = adapter->pdev;
vf->base = pcim_iomap_table(pdev); vdpa_dev = &adapter->vdpa;
adapter->pdev = pdev; if (name)
adapter->vdpa.dma_dev = &pdev->dev; ret = dev_set_name(&vdpa_dev->dev, "%s", name);
else
ret = ifcvf_init_hw(vf, pdev); ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
goto err;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring); ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
if (ret) { if (ret) {
put_device(&adapter->vdpa.dev);
IFCVF_ERR(pdev, "Failed to register to vDPA bus"); IFCVF_ERR(pdev, "Failed to register to vDPA bus");
goto err; return ret;
} }
return 0; return 0;
err:
put_device(&adapter->vdpa.dev);
return ret;
} }
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
...@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
u32 dev_type; u32 dev_type;
int ret; int ret, i;
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->pdev = pdev;
ret = pcim_enable_device(pdev); ret = pcim_enable_device(pdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, "Failed to enable device\n"); IFCVF_ERR(pdev, "Failed to enable device\n");
goto err; return ret;
} }
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
IFCVF_DRIVER_NAME); IFCVF_DRIVER_NAME);
if (ret) { if (ret) {
IFCVF_ERR(pdev, "Failed to request MMIO region\n"); IFCVF_ERR(pdev, "Failed to request MMIO region\n");
goto err; return ret;
} }
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) { if (ret) {
IFCVF_ERR(pdev, "No usable DMA configuration\n"); IFCVF_ERR(pdev, "No usable DMA configuration\n");
goto err; return ret;
} }
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, IFCVF_ERR(pdev,
"Failed for adding devres for freeing irq vectors\n"); "Failed for adding devres for freeing irq vectors\n");
goto err; return ret;
} }
pci_set_master(pdev); pci_set_master(pdev);
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, NULL, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
adapter->vdpa.dma_dev = &pdev->dev;
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
return ret;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->adapter = adapter;
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev); ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, IFCVF_ERR(pdev,
......
...@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent { ...@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_dev *mvdev;
}; };
enum {
MLX5_VDPA_DATAVQ_GROUP,
MLX5_VDPA_CVQ_GROUP,
MLX5_VDPA_NUMVQ_GROUPS
};
enum {
MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
};
struct mlx5_vdpa_dev { struct mlx5_vdpa_dev {
struct vdpa_device vdev; struct vdpa_device vdev;
struct mlx5_core_dev *mdev; struct mlx5_core_dev *mdev;
...@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev { ...@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
struct mlx5_vdpa_mr mr; struct mlx5_vdpa_mr mr;
struct mlx5_control_vq cvq; struct mlx5_control_vq cvq;
struct workqueue_struct *wq; struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
}; };
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
......
...@@ -164,6 +164,7 @@ struct mlx5_vdpa_net { ...@@ -164,6 +164,7 @@ struct mlx5_vdpa_net {
bool setup; bool setup;
u32 cur_num_vqs; u32 cur_num_vqs;
u32 rqt_size; u32 rqt_size;
bool nb_registered;
struct notifier_block nb; struct notifier_block nb;
struct vdpa_callback config_cb; struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent; struct mlx5_vdpa_wq_ent cvq_ent;
...@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque ...@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
if (err) if (err)
goto err_cmd; goto err_cmd;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
kfree(in); kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
...@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq ...@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
return; return;
} }
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq); umems_destroy(ndev, mvq);
} }
...@@ -1121,6 +1124,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu ...@@ -1121,6 +1124,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
return err; return err;
} }
static bool is_valid_state_change(int oldstate, int newstate)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default:
return false;
}
}
static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
{ {
int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
...@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque ...@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
void *in; void *in;
int err; int err;
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
return 0;
if (!is_valid_state_change(mvq->fw_state, state))
return -EINVAL;
in = kzalloc(inlen, GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL);
if (!in) if (!in)
return -ENOMEM; return -ENOMEM;
...@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, ...@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
memset(dmac_c, 0xff, ETH_ALEN); eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac); ether_addr_copy(dmac_v, mac);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
if (tagged) { if (tagged) {
...@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready ...@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq; struct mlx5_vdpa_virtqueue *mvq;
int err;
if (!mvdev->actual_features) if (!mvdev->actual_features)
return; return;
...@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready ...@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
} }
mvq = &ndev->vqs[idx]; mvq = &ndev->vqs[idx];
if (!ready) if (!ready) {
suspend_vq(ndev, mvq); suspend_vq(ndev, mvq);
} else {
err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
if (err) {
mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
ready = false;
}
}
mvq->ready = ready; mvq->ready = ready;
} }
...@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) ...@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
return PAGE_SIZE; return PAGE_SIZE;
} }
static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
{ {
return 0; struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (is_ctrl_vq_idx(mvdev, idx))
return MLX5_VDPA_CVQ_GROUP;
return MLX5_VDPA_DATAVQ_GROUP;
} }
enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
...@@ -2511,6 +2548,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) ...@@ -2511,6 +2548,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock); up_write(&ndev->reslock);
} }
static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
{
int i;
/* default mapping all groups are mapped to asid 0 */
for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
mvdev->group2asid[i] = 0;
}
static int mlx5_vdpa_reset(struct vdpa_device *vdev) static int mlx5_vdpa_reset(struct vdpa_device *vdev)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
...@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) ...@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
ndev->mvdev.cvq.completed_desc = 0; ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
ndev->mvdev.actual_features = 0; ndev->mvdev.actual_features = 0;
init_group_to_asid_map(mvdev);
++mvdev->generation; ++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_mr(mvdev, NULL)) if (mlx5_vdpa_create_mr(mvdev, NULL))
mlx5_vdpa_warn(mvdev, "create MR failed\n"); mlx5_vdpa_warn(mvdev, "create MR failed\n");
...@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) ...@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation; return mvdev->generation;
} }
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
struct vhost_iotlb *iotlb) {
u64 start = 0ULL, last = 0ULL - 1;
struct vhost_iotlb_map *map;
int err = 0;
spin_lock(&mvdev->cvq.iommu_lock);
vhost_iotlb_reset(mvdev->cvq.iotlb);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
map->last, map->addr, map->perm);
if (err)
goto out;
}
out:
spin_unlock(&mvdev->cvq.iommu_lock);
return err;
}
static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
bool change_map; bool change_map;
int err; int err;
down_write(&ndev->reslock);
err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
if (err) { if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
goto err; return err;
} }
if (change_map) if (change_map)
err = mlx5_vdpa_change_map(mvdev, iotlb); err = mlx5_vdpa_change_map(mvdev, iotlb);
err: return err;
}
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err = -EINVAL;
down_write(&ndev->reslock);
if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
err = set_map_data(mvdev, iotlb);
if (err)
goto out;
}
if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
err = set_map_control(mvdev, iotlb);
out:
up_write(&ndev->reslock); up_write(&ndev->reslock);
return err; return err;
} }
...@@ -2733,6 +2818,49 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, ...@@ -2733,6 +2818,49 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
return err; return err;
} }
static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_control_vq *cvq;
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
return;
cvq = &mvdev->cvq;
cvq->ready = false;
}
static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
int i;
down_write(&ndev->reslock);
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
flush_workqueue(ndev->mvdev.wq);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
}
mlx5_vdpa_cvq_suspend(mvdev);
up_write(&ndev->reslock);
return 0;
}
static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
unsigned int asid)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (group >= MLX5_VDPA_NUMVQ_GROUPS)
return -EINVAL;
mvdev->group2asid[group] = asid;
return 0;
}
static const struct vdpa_config_ops mlx5_vdpa_ops = { static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_vq_address = mlx5_vdpa_set_vq_address, .set_vq_address = mlx5_vdpa_set_vq_address,
.set_vq_num = mlx5_vdpa_set_vq_num, .set_vq_num = mlx5_vdpa_set_vq_num,
...@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { ...@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_config = mlx5_vdpa_set_config, .set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation, .get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map, .set_map = mlx5_vdpa_set_map,
.set_group_asid = mlx5_set_group_asid,
.free = mlx5_vdpa_free, .free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
}; };
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
...@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) ...@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
mvq->index = i; mvq->index = i;
mvq->ndev = ndev; mvq->ndev = ndev;
mvq->fwqp.fw = true; mvq->fwqp.fw = true;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
} }
for (; i < ndev->mvdev.max_vqs; i++) { for (; i < ndev->mvdev.max_vqs; i++) {
mvq = &ndev->vqs[i]; mvq = &ndev->vqs[i];
...@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p ...@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
switch (eqe->sub_type) { switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
down_read(&ndev->reslock);
if (!ndev->nb_registered) {
up_read(&ndev->reslock);
return NOTIFY_DONE;
}
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
if (!wqent) if (!wqent) {
up_read(&ndev->reslock);
return NOTIFY_DONE; return NOTIFY_DONE;
}
wqent->mvdev = &ndev->mvdev; wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier); INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work); queue_work(ndev->mvdev.wq, &wqent->work);
up_read(&ndev->reslock);
ret = NOTIFY_OK; ret = NOTIFY_OK;
break; break;
default: default:
...@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ...@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
} }
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1, 1, name, false); MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
if (IS_ERR(ndev)) if (IS_ERR(ndev))
return PTR_ERR(ndev); return PTR_ERR(ndev);
...@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ...@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler; ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb); mlx5_notifier_register(mdev, &ndev->nb);
ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev; mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err) if (err)
...@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * ...@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq; struct workqueue_struct *wq;
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); if (ndev->nb_registered) {
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
}
wq = mvdev->wq; wq = mvdev->wq;
mvdev->wq = NULL; mvdev->wq = NULL;
destroy_workqueue(wq); destroy_workqueue(wq);
......
...@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms ...@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
config.mac)) config.mac))
return -EMSGSIZE; return -EMSGSIZE;
val_u16 = le16_to_cpu(config.status); val_u16 = __virtio16_to_cpu(true, config.status);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
return -EMSGSIZE; return -EMSGSIZE;
val_u16 = le16_to_cpu(config.mtu); val_u16 = __virtio16_to_cpu(true, config.mtu);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE; return -EMSGSIZE;
...@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, ...@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
{ {
u32 device_id; u32 device_id;
void *hdr; void *hdr;
u8 status;
int err; int err;
down_read(&vdev->cf_lock); down_read(&vdev->cf_lock);
status = vdev->config->get_status(vdev);
if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
err = -EAGAIN;
goto out;
}
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET); VDPA_CMD_DEV_CONFIG_GET);
if (!hdr) { if (!hdr) {
...@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, ...@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
} }
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
max_vqp = le16_to_cpu(config.max_virtqueue_pairs); max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
return -EMSGSIZE; return -EMSGSIZE;
......
...@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); ...@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static int max_iotlb_entries = 2048; static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444); module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries, MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256 #define VDPASIM_QUEUE_MAX 256
...@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) ...@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++) for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(&vdpasim->iommu[i]); vhost_iotlb_reset(&vdpasim->iommu[i]);
vdpasim->running = true;
spin_unlock(&vdpasim->iommu_lock); spin_unlock(&vdpasim->iommu_lock);
vdpasim->features = 0; vdpasim->features = 0;
...@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ...@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu; goto err_iommu;
for (i = 0; i < vdpasim->dev_attr.nas; i++) for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_init(&vdpasim->iommu[i], 0, 0); vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer) if (!vdpasim->buffer)
...@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa) ...@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0; return 0;
} }
static int vdpasim_suspend(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
spin_lock(&vdpasim->lock);
vdpasim->running = false;
spin_unlock(&vdpasim->lock);
return 0;
}
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa) static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{ {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
...@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { ...@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
...@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { ...@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
......
...@@ -66,6 +66,7 @@ struct vdpasim { ...@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation; u32 generation;
u64 features; u64 features;
u32 groups; u32 groups;
bool running;
/* spinlock to synchronize iommu table */ /* spinlock to synchronize iommu table */
spinlock_t iommu_lock; spinlock_t iommu_lock;
}; };
......
This diff is collapsed.
...@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work) ...@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
spin_lock(&vdpasim->lock); spin_lock(&vdpasim->lock);
if (!vdpasim->running)
goto out;
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out; goto out;
......
...@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size, ...@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size,
{ {
unsigned long pfn = PFN_DOWN(orig); unsigned long pfn = PFN_DOWN(orig);
unsigned int offset = offset_in_page(orig); unsigned int offset = offset_in_page(orig);
char *buffer; struct page *page;
unsigned int sz = 0; unsigned int sz = 0;
while (size) { while (size) {
sz = min_t(size_t, PAGE_SIZE - offset, size); sz = min_t(size_t, PAGE_SIZE - offset, size);
buffer = kmap_atomic(pfn_to_page(pfn)); page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE) if (dir == DMA_TO_DEVICE)
memcpy(addr, buffer + offset, sz); memcpy_from_page(addr, page, offset, sz);
else else
memcpy(buffer + offset, addr, sz); memcpy_to_page(page, offset, addr, sz);
kunmap_atomic(buffer);
size -= sz; size -= sz;
pfn++; pfn++;
...@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, ...@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
map->orig_phys == INVALID_PHYS_ADDR)) map->orig_phys == INVALID_PHYS_ADDR))
return; return;
addr = page_address(map->bounce_page) + offset; addr = kmap_local_page(map->bounce_page);
do_bounce(map->orig_phys + offset, addr, sz, dir); do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
kunmap_local(addr);
size -= sz; size -= sz;
iova += sz; iova += sz;
} }
...@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) ...@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
struct vduse_bounce_map *map; struct vduse_bounce_map *map;
struct page *page = NULL; struct page *page = NULL;
spin_lock(&domain->iotlb_lock); read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT]; map = &domain->bounce_maps[iova >> PAGE_SHIFT];
if (!map->bounce_page) if (domain->user_bounce_pages || !map->bounce_page)
goto out; goto out;
page = map->bounce_page; page = map->bounce_page;
get_page(page); get_page(page);
out: out:
spin_unlock(&domain->iotlb_lock); read_unlock(&domain->bounce_lock);
return page; return page;
} }
static void static void
vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
{ {
struct vduse_bounce_map *map; struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns; unsigned long pfn, bounce_pfns;
...@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) ...@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
} }
} }
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count)
{
struct vduse_bounce_map *map;
int i, ret;
/* Now we don't support partial mapping */
if (count != (domain->bounce_size >> PAGE_SHIFT))
return -EINVAL;
write_lock(&domain->bounce_lock);
ret = -EEXIST;
if (domain->user_bounce_pages)
goto out;
for (i = 0; i < count; i++) {
map = &domain->bounce_maps[i];
if (map->bounce_page) {
/* Copy kernel page to user page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR)
memcpy_to_page(pages[i], 0,
page_address(map->bounce_page),
PAGE_SIZE);
__free_page(map->bounce_page);
}
map->bounce_page = pages[i];
get_page(pages[i]);
}
domain->user_bounce_pages = true;
ret = 0;
out:
write_unlock(&domain->bounce_lock);
return ret;
}
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long i, count;
write_lock(&domain->bounce_lock);
if (!domain->user_bounce_pages)
goto out;
count = domain->bounce_size >> PAGE_SHIFT;
for (i = 0; i < count; i++) {
struct page *page = NULL;
map = &domain->bounce_maps[i];
if (WARN_ON(!map->bounce_page))
continue;
/* Copy user page to kernel page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR) {
page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
memcpy_from_page(page_address(page),
map->bounce_page, 0, PAGE_SIZE);
}
put_page(map->bounce_page);
map->bounce_page = page;
}
domain->user_bounce_pages = false;
out:
write_unlock(&domain->bounce_lock);
}
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
{ {
if (!domain->bounce_map) if (!domain->bounce_map)
...@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, ...@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
if (vduse_domain_init_bounce_map(domain)) if (vduse_domain_init_bounce_map(domain))
goto err; goto err;
read_lock(&domain->bounce_lock);
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
goto err; goto err_unlock;
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
read_unlock(&domain->bounce_lock);
return iova; return iova;
err_unlock:
read_unlock(&domain->bounce_lock);
err: err:
vduse_domain_free_iova(iovad, iova, size); vduse_domain_free_iova(iovad, iova, size);
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
...@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, ...@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
{ {
struct iova_domain *iovad = &domain->stream_iovad; struct iova_domain *iovad = &domain->stream_iovad;
read_lock(&domain->bounce_lock);
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
read_unlock(&domain->bounce_lock);
vduse_domain_free_iova(iovad, dma_addr, size); vduse_domain_free_iova(iovad, dma_addr, size);
} }
...@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file) ...@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file)
spin_lock(&domain->iotlb_lock); spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, 0, ULLONG_MAX); vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
vduse_domain_free_bounce_pages(domain); vduse_domain_remove_user_bounce_pages(domain);
vduse_domain_free_kernel_bounce_pages(domain);
spin_unlock(&domain->iotlb_lock); spin_unlock(&domain->iotlb_lock);
put_iova_domain(&domain->stream_iovad); put_iova_domain(&domain->stream_iovad);
put_iova_domain(&domain->consistent_iovad); put_iova_domain(&domain->consistent_iovad);
...@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) ...@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
goto err_file; goto err_file;
domain->file = file; domain->file = file;
rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock); spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad, init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN); PAGE_SIZE, IOVA_START_PFN);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/iova.h> #include <linux/iova.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/vhost_iotlb.h> #include <linux/vhost_iotlb.h>
#include <linux/rwlock.h>
#define IOVA_START_PFN 1 #define IOVA_START_PFN 1
...@@ -34,6 +35,8 @@ struct vduse_iova_domain { ...@@ -34,6 +35,8 @@ struct vduse_iova_domain {
struct vhost_iotlb *iotlb; struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock; spinlock_t iotlb_lock;
struct file *file; struct file *file;
bool user_bounce_pages;
rwlock_t bounce_lock;
}; };
int vduse_domain_set_map(struct vduse_iova_domain *domain, int vduse_domain_set_map(struct vduse_iova_domain *domain,
...@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, ...@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain); void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count);
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain);
void vduse_domain_destroy(struct vduse_iova_domain *domain); void vduse_domain_destroy(struct vduse_iova_domain *domain);
struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit, struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/vdpa.h> #include <linux/vdpa.h>
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h> #include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h> #include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_config.h>
...@@ -64,6 +66,13 @@ struct vduse_vdpa { ...@@ -64,6 +66,13 @@ struct vduse_vdpa {
struct vduse_dev *dev; struct vduse_dev *dev;
}; };
struct vduse_umem {
unsigned long iova;
unsigned long npages;
struct page **pages;
struct mm_struct *mm;
};
struct vduse_dev { struct vduse_dev {
struct vduse_vdpa *vdev; struct vduse_vdpa *vdev;
struct device *dev; struct device *dev;
...@@ -95,6 +104,8 @@ struct vduse_dev { ...@@ -95,6 +104,8 @@ struct vduse_dev {
u8 status; u8 status;
u32 vq_num; u32 vq_num;
u32 vq_align; u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
}; };
struct vduse_dev_msg { struct vduse_dev_msg {
...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev, ...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
return ret; return ret;
} }
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
u64 iova, u64 size)
{
int ret;
mutex_lock(&dev->mem_lock);
ret = -ENOENT;
if (!dev->umem)
goto unlock;
ret = -EINVAL;
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
vduse_domain_remove_user_bounce_pages(dev->domain);
unpin_user_pages_dirty_lock(dev->umem->pages,
dev->umem->npages, true);
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
mmdrop(dev->umem->mm);
vfree(dev->umem->pages);
kfree(dev->umem);
dev->umem = NULL;
ret = 0;
unlock:
mutex_unlock(&dev->mem_lock);
return ret;
}
static int vduse_dev_reg_umem(struct vduse_dev *dev,
u64 iova, u64 uaddr, u64 size)
{
struct page **page_list = NULL;
struct vduse_umem *umem = NULL;
long pinned = 0;
unsigned long npages, lock_limit;
int ret;
if (!dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
mutex_lock(&dev->mem_lock);
ret = -EEXIST;
if (dev->umem)
goto unlock;
ret = -ENOMEM;
npages = size >> PAGE_SHIFT;
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
GFP_KERNEL_ACCOUNT);
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!page_list || !umem)
goto unlock;
mmap_read_lock(current->mm);
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
goto out;
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
page_list, NULL);
if (pinned != npages) {
ret = pinned < 0 ? pinned : -ENOMEM;
goto out;
}
ret = vduse_domain_add_user_bounce_pages(dev->domain,
page_list, pinned);
if (ret)
goto out;
atomic64_add(npages, &current->mm->pinned_vm);
umem->pages = page_list;
umem->npages = pinned;
umem->iova = iova;
umem->mm = current->mm;
mmgrab(current->mm);
dev->umem = umem;
out:
if (ret && pinned > 0)
unpin_user_pages(page_list, pinned);
mmap_read_unlock(current->mm);
unlock:
if (ret) {
vfree(page_list);
kfree(umem);
}
mutex_unlock(&dev->mem_lock);
return ret;
}
static long vduse_dev_ioctl(struct file *file, unsigned int cmd, static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
...@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, ...@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
break; break;
} }
case VDUSE_IOTLB_REG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
break;
ret = -EINVAL;
if (info.start > info.last)
break;
if (!is_mem_zero((const char *)info.reserved,
sizeof(info.reserved)))
break;
spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
if (domain->bounce_map && map->start == 0 &&
map->last == domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
spin_unlock(&domain->iotlb_lock);
if (!map)
break;
ret = -EFAULT;
if (copy_to_user(argp, &info, sizeof(info)))
break;
ret = 0;
break;
}
default: default:
ret = -ENOIOCTLCMD; ret = -ENOIOCTLCMD;
break; break;
...@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) ...@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{ {
struct vduse_dev *dev = file->private_data; struct vduse_dev *dev = file->private_data;
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock); spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */ /* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list); list_splice_init(&dev->recv_list, &dev->send_list);
...@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void) ...@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL; return NULL;
mutex_init(&dev->lock); mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock); spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list); INIT_LIST_HEAD(&dev->recv_list);
......
...@@ -159,9 +159,13 @@ enum { ...@@ -159,9 +159,13 @@ enum {
}; };
#define VHOST_SCSI_MAX_TARGET 256 #define VHOST_SCSI_MAX_TARGET 256
#define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_IO_VQ 1024
#define VHOST_SCSI_MAX_EVENT 128 #define VHOST_SCSI_MAX_EVENT 128
static unsigned vhost_scsi_max_io_vqs = 128;
module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644);
MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024.");
struct vhost_scsi_virtqueue { struct vhost_scsi_virtqueue {
struct vhost_virtqueue vq; struct vhost_virtqueue vq;
/* /*
...@@ -186,7 +190,9 @@ struct vhost_scsi { ...@@ -186,7 +190,9 @@ struct vhost_scsi {
char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
struct vhost_dev dev; struct vhost_dev dev;
struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; struct vhost_scsi_virtqueue *vqs;
unsigned long *compl_bitmap;
struct vhost_scsi_inflight **old_inflight;
struct vhost_work vs_completion_work; /* cmd completion work item */ struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */ struct llist_head vs_completion_list; /* cmd completion queue */
...@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs, ...@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
struct vhost_virtqueue *vq; struct vhost_virtqueue *vq;
int idx, i; int idx, i;
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
...@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
{ {
struct vhost_scsi *vs = container_of(work, struct vhost_scsi, struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
vs_completion_work); vs_completion_work);
DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
struct virtio_scsi_cmd_resp v_rsp; struct virtio_scsi_cmd_resp v_rsp;
struct vhost_scsi_cmd *cmd, *t; struct vhost_scsi_cmd *cmd, *t;
struct llist_node *llnode; struct llist_node *llnode;
...@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
struct iov_iter iov_iter; struct iov_iter iov_iter;
int ret, vq; int ret, vq;
bitmap_zero(signal, VHOST_SCSI_MAX_VQ); bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
llnode = llist_del_all(&vs->vs_completion_list); llnode = llist_del_all(&vs->vs_completion_list);
llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
se_cmd = &cmd->tvc_se_cmd; se_cmd = &cmd->tvc_se_cmd;
...@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs; vq = q - vs->vqs;
__set_bit(vq, signal); __set_bit(vq, vs->compl_bitmap);
} else } else
pr_err("Faulted on virtio_scsi_cmd_resp\n"); pr_err("Faulted on virtio_scsi_cmd_resp\n");
...@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
} }
vq = -1; vq = -1;
while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
< VHOST_SCSI_MAX_VQ) < vs->dev.nvqs)
vhost_signal(&vs->dev, &vs->vqs[vq].vq); vhost_signal(&vs->dev, &vs->vqs[vq].vq);
} }
...@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) ...@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
/* Callers must hold dev mutex */ /* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs) static void vhost_scsi_flush(struct vhost_scsi *vs)
{ {
struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
int i; int i;
/* Init new inflight and remember the old inflight */ /* Init new inflight and remember the old inflight */
vhost_scsi_init_inflight(vs, old_inflight); vhost_scsi_init_inflight(vs, vs->old_inflight);
/* /*
* The inflight->kref was initialized to 1. We decrement it here to * The inflight->kref was initialized to 1. We decrement it here to
* indicate the start of the flush operation so that it will reach 0 * indicate the start of the flush operation so that it will reach 0
* when all the reqs are finished. * when all the reqs are finished.
*/ */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < vs->dev.nvqs; i++)
kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight);
/* Flush both the vhost poll and vhost work */ /* Flush both the vhost poll and vhost work */
vhost_dev_flush(&vs->dev); vhost_dev_flush(&vs->dev);
/* Wait for all reqs issued before the flush to be finished */ /* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < vs->dev.nvqs; i++)
wait_for_completion(&old_inflight[i]->comp); wait_for_completion(&vs->old_inflight[i]->comp);
} }
static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
...@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn)); sizeof(vs->vs_vhost_wwpn));
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
if (!vhost_vq_is_setup(vq)) if (!vhost_vq_is_setup(vq))
continue; continue;
...@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
goto destroy_vq_cmds; goto destroy_vq_cmds;
} }
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, vs_tpg); vhost_vq_set_backend(vq, vs_tpg);
...@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, ...@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
target_undepend_item(&se_tpg->tpg_group.cg_item); target_undepend_item(&se_tpg->tpg_group.cg_item);
} }
if (match) { if (match) {
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, NULL); vhost_vq_set_backend(vq, NULL);
...@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, ...@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
/* Make sure cmds are not running before tearing them down. */ /* Make sure cmds are not running before tearing them down. */
vhost_scsi_flush(vs); vhost_scsi_flush(vs);
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
vhost_scsi_destroy_vq_cmds(vq); vhost_scsi_destroy_vq_cmds(vq);
} }
...@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) ...@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
return -EFAULT; return -EFAULT;
} }
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vq->acked_features = features; vq->acked_features = features;
...@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
{ {
struct vhost_scsi *vs; struct vhost_scsi *vs;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int r = -ENOMEM, i; int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
vs = kvzalloc(sizeof(*vs), GFP_KERNEL); vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs) if (!vs)
goto err_vs; goto err_vs;
vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL); if (nvqs > VHOST_SCSI_MAX_IO_VQ) {
if (!vqs) pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs,
VHOST_SCSI_MAX_IO_VQ);
nvqs = VHOST_SCSI_MAX_IO_VQ;
} else if (nvqs == 0) {
pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs);
nvqs = 1;
}
nvqs += VHOST_SCSI_VQ_IO;
vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
if (!vs->compl_bitmap)
goto err_compl_bitmap;
vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
GFP_KERNEL | __GFP_ZERO);
if (!vs->old_inflight)
goto err_inflight;
vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs),
GFP_KERNEL | __GFP_ZERO);
if (!vs->vqs)
goto err_vqs; goto err_vqs;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs)
goto err_local_vqs;
vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
...@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
vqs[i] = &vs->vqs[i].vq; vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
} }
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0, true, NULL); VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL); vhost_scsi_init_inflight(vs, NULL);
...@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
f->private_data = vs; f->private_data = vs;
return 0; return 0;
err_local_vqs:
kfree(vs->vqs);
err_vqs: err_vqs:
kfree(vs->old_inflight);
err_inflight:
bitmap_free(vs->compl_bitmap);
err_compl_bitmap:
kvfree(vs); kvfree(vs);
err_vs: err_vs:
return r; return r;
...@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) ...@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_dev_stop(&vs->dev); vhost_dev_stop(&vs->dev);
vhost_dev_cleanup(&vs->dev); vhost_dev_cleanup(&vs->dev);
kfree(vs->dev.vqs); kfree(vs->dev.vqs);
kfree(vs->vqs);
kfree(vs->old_inflight);
bitmap_free(vs->compl_bitmap);
kvfree(vs); kvfree(vs);
return 0; return 0;
} }
......
...@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, ...@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return 0; return 0;
} }
static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return ops->suspend;
}
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
...@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) ...@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
return 0; return 0;
} }
/* After a successful return of ioctl the device must not process more
* virtqueue descriptors. The device can answer to read or writes of config
* fields as if it were not suspended. In particular, writing to "queue_enable"
* with a value of 1 will not make the device start processing buffers.
*/
static long vhost_vdpa_suspend(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (!ops->suspend)
return -EOPNOTSUPP;
return ops->suspend(vdpa);
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp) void __user *argp)
{ {
...@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (cmd == VHOST_SET_BACKEND_FEATURES) { if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features))) if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT; return -EFAULT;
if (features & ~VHOST_VDPA_BACKEND_FEATURES) if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
!vhost_vdpa_can_suspend(v))
return -EOPNOTSUPP; return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features); vhost_set_backend_features(&v->vdev, features);
return 0; return 0;
...@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break; break;
case VHOST_GET_BACKEND_FEATURES: case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES; features = VHOST_VDPA_BACKEND_FEATURES;
if (vhost_vdpa_can_suspend(v))
features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (copy_to_user(featurep, &features, sizeof(features))) if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT; r = -EFAULT;
break; break;
...@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_VQS_COUNT: case VHOST_VDPA_GET_VQS_COUNT:
r = vhost_vdpa_get_vqs_count(v, argp); r = vhost_vdpa_get_vqs_count(v, argp);
break; break;
case VHOST_VDPA_SUSPEND:
r = vhost_vdpa_suspend(v);
break;
default: default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp); r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD) if (r == -ENOIOCTLCMD)
...@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) ...@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
if (!bus) if (!bus)
return -EFAULT; return -EFAULT;
if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
return -ENOTSUPP; return -ENOTSUPP;
v->domain = iommu_domain_alloc(bus); v->domain = iommu_domain_alloc(bus);
...@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) ...@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
err: err:
put_device(&v->dev); put_device(&v->dev);
ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r; return r;
} }
......
...@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern); ...@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
static int iotlb_translate(const struct vringh *vrh, static int iotlb_translate(const struct vringh *vrh,
u64 addr, u64 len, struct bio_vec iov[], u64 addr, u64 len, u64 *translated,
struct bio_vec iov[],
int iov_size, u32 perm) int iov_size, u32 perm)
{ {
struct vhost_iotlb_map *map; struct vhost_iotlb_map *map;
...@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh, ...@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh,
spin_unlock(vrh->iotlb_lock); spin_unlock(vrh->iotlb_lock);
if (translated)
*translated = min(len, s);
return ret; return ret;
} }
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len) void *src, size_t len)
{ {
struct iov_iter iter; u64 total_translated = 0;
struct bio_vec iov[16];
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)src, while (total_translated < len) {
len, iov, 16, VHOST_MAP_RO); struct bio_vec iov[16];
if (ret < 0) struct iov_iter iter;
return ret; u64 translated;
int ret;
iov_iter_bvec(&iter, READ, iov, ret, len); ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
ret = copy_from_iter(dst, len, &iter); iov_iter_bvec(&iter, READ, iov, ret, translated);
return ret; ret = copy_from_iter(dst, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return total_translated;
} }
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len) void *src, size_t len)
{ {
struct iov_iter iter; u64 total_translated = 0;
struct bio_vec iov[16];
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, while (total_translated < len) {
len, iov, 16, VHOST_MAP_WO); struct bio_vec iov[16];
if (ret < 0) struct iov_iter iter;
return ret; u64 translated;
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
iov_iter_bvec(&iter, WRITE, iov, ret, len); iov_iter_bvec(&iter, WRITE, iov, ret, translated);
ret = copy_to_iter(src, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return copy_to_iter(src, len, &iter); return total_translated;
} }
static inline int getu16_iotlb(const struct vringh *vrh, static inline int getu16_iotlb(const struct vringh *vrh,
...@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh, ...@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh,
int ret; int ret;
/* Atomic read is needed for getu16 */ /* Atomic read is needed for getu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_RO); &iov, 1, VHOST_MAP_RO);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh, ...@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh,
int ret; int ret;
/* Atomic write is needed for putu16 */ /* Atomic write is needed for putu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_WO); &iov, 1, VHOST_MAP_WO);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -35,11 +35,12 @@ if VIRTIO_MENU ...@@ -35,11 +35,12 @@ if VIRTIO_MENU
config VIRTIO_HARDEN_NOTIFICATION config VIRTIO_HARDEN_NOTIFICATION
bool "Harden virtio notification" bool "Harden virtio notification"
depends on BROKEN
help help
Enable this to harden the device notifications and suppress Enable this to harden the device notifications and suppress
those that happen at a time where notifications are illegal. those that happen at a time where notifications are illegal.
Experimental: Note that several drivers still have bugs that Experimental: Note that several drivers still have issues that
may cause crashes or hangs when correct handling of may cause crashes or hangs when correct handling of
notifications is enforced; depending on the subset of notifications is enforced; depending on the subset of
drivers and devices you use, this may or may not work. drivers and devices you use, this may or may not work.
...@@ -126,9 +127,11 @@ config VIRTIO_MEM ...@@ -126,9 +127,11 @@ config VIRTIO_MEM
This driver provides access to virtio-mem paravirtualized memory This driver provides access to virtio-mem paravirtualized memory
devices, allowing to hotplug and hotunplug memory. devices, allowing to hotplug and hotunplug memory.
This driver was only tested under x86-64 and arm64, but should This driver currently only supports x86-64 and arm64. Although it
theoretically work on all architectures that support memory hotplug should compile on other architectures that implement memory
and hotremove. hot(un)plug, architecture-specific and/or common
code changes may be required for virtio-mem, kdump and kexec to work as
expected.
If unsure, say M. If unsure, say M.
......
...@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev) ...@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev)
goto out; goto out;
dev->index = err; dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index); err = dev_set_name(&dev->dev, "virtio%u", dev->index);
if (err)
goto out_ida_remove;
err = virtio_device_of_init(dev); err = virtio_device_of_init(dev);
if (err) if (err)
......
...@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev) ...@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev)
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, bool ctx) const char *name, u32 size, bool ctx)
{ {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info; struct virtio_mmio_vq_info *info;
...@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in ...@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
goto error_new_virtqueue; goto error_new_virtqueue;
} }
if (!size || size > num)
size = num;
/* Create the vring */ /* Create the vring */
vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, true, ctx, vm_notify, callback, name); true, true, ctx, vm_notify, callback, name);
if (!vq) { if (!vq) {
err = -ENOMEM; err = -ENOMEM;
goto error_new_virtqueue; goto error_new_virtqueue;
} }
vq->num_max = num;
/* Activate the queue */ /* Activate the queue */
writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
if (vm_dev->version == 1) { if (vm_dev->version == 1) {
...@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
if (err) if (err)
return err; return err;
if (of_property_read_bool(vm_dev->pdev->dev.of_node, "wakeup-source"))
enable_irq_wake(irq);
for (i = 0; i < nvqs; ++i) { for (i = 0; i < nvqs; ++i) {
if (!names[i]) { if (!names[i]) {
vqs[i] = NULL; vqs[i] = NULL;
...@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
} }
vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false); ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev); vm_del_vqs(vdev);
......
...@@ -174,6 +174,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, ...@@ -174,6 +174,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in ...@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in
if (!info) if (!info)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
msix_vec); msix_vec);
if (IS_ERR(vq)) if (IS_ERR(vq))
goto out_info; goto out_info;
...@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq) ...@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq)
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags); /*
list_del(&info->node); * If it fails during re-enable reset vq. This way we won't rejoin
spin_unlock_irqrestore(&vp_dev->lock, flags); * info->node to the queue. Prevent unexpected irqs.
*/
if (!vq->reset) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
}
vp_dev->del_vq(info); vp_dev->del_vq(info);
kfree(info); kfree(info);
...@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev) ...@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], bool per_vq_vectors, const char * const names[], u32 sizes[], bool per_vq_vectors,
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, ...@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
else else
msix_vec = VP_MSIX_VQ_VECTOR; msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false, sizes ? sizes[i] : 0,
msix_vec); ctx ? ctx[i] : false, msix_vec);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto error_find; goto error_find;
...@@ -351,7 +358,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, ...@@ -351,7 +358,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx) const char * const names[], u32 sizes[], const bool *ctx)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err, queue_idx = 0; int i, err, queue_idx = 0;
...@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, ...@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
continue; continue;
} }
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false, ctx ? ctx[i] : false,
VIRTIO_MSI_NO_VECTOR); VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
...@@ -390,21 +398,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, ...@@ -390,21 +398,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
/* the config->find_vqs() implementation */ /* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
int err; int err;
/* Try MSI-X with one vector per queue. */ /* Try MSI-X with one vector per queue. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc); err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc);
if (!err) if (!err)
return 0; return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */ /* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc);
if (!err) if (!err)
return 0; return 0;
/* Finally fall back to regular interrupts. */ /* Finally fall back to regular interrupts. */
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
} }
const char *vp_bus_name(struct virtio_device *vdev) const char *vp_bus_name(struct virtio_device *vdev)
......
...@@ -80,6 +80,7 @@ struct virtio_pci_device { ...@@ -80,6 +80,7 @@ struct virtio_pci_device {
unsigned int idx, unsigned int idx,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec); u16 msix_vec);
void (*del_vq)(struct virtio_pci_vq_info *info); void (*del_vq)(struct virtio_pci_vq_info *info);
...@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev); ...@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev);
/* the config->find_vqs() implementation */ /* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc); struct irq_affinity *desc);
const char *vp_bus_name(struct virtio_device *vdev); const char *vp_bus_name(struct virtio_device *vdev);
......
...@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
if (!size || size > num)
size = num;
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, size,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, false, ctx, true, false, ctx,
vp_notify, callback, name); vp_notify, callback, name);
if (!vq) if (!vq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
vq->num_max = num;
q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
if (q_pfn >> 32) { if (q_pfn >> 32) {
dev_err(&vp_dev->pci_dev->dev, dev_err(&vp_dev->pci_dev->dev,
......
...@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) ...@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
__virtio_set_bit(vdev, VIRTIO_F_SR_IOV); __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
if (features & BIT_ULL(VIRTIO_F_RING_RESET))
__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
} }
/* virtio config->finalize_features() implementation */ /* virtio config->finalize_features() implementation */
...@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev) ...@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev)
vp_synchronize_vectors(vdev); vp_synchronize_vectors(vdev);
} }
static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
unsigned long index;
index = vq->index;
/* activate the queue */
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR)
return -EBUSY;
}
return 0;
}
static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags;
if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
return -ENOENT;
vp_modern_set_queue_reset(mdev, vq->index);
info = vp_dev->vqs[vq->index];
/* delete vq from irq handler */
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
INIT_LIST_HEAD(&info->node);
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_break(vq);
#endif
/* For the case where vq has an exclusive irq, call synchronize_irq() to
* wait for completion.
*
* note: We can't use disable_irq() since it conflicts with the affinity
* managed IRQ that is used by some drivers.
*/
if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
vq->reset = true;
return 0;
}
static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags, index;
int err;
if (!vq->reset)
return -EBUSY;
index = vq->index;
info = vp_dev->vqs[index];
if (vp_modern_get_queue_reset(mdev, index))
return -EBUSY;
if (vp_modern_get_queue_enable(mdev, index))
return -EBUSY;
err = vp_active_vq(vq, info->msix_vector);
if (err)
return err;
if (vq->callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_unbreak(vq);
#endif
vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
vq->reset = false;
return 0;
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{ {
return vp_modern_config_vector(&vp_dev->mdev, vector); return vp_modern_config_vector(&vp_dev->mdev, vector);
...@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_modern_get_queue_enable(mdev, index)) if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
if (num & (num - 1)) { if (!size || size > num)
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); size = num;
if (size & (size - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, size,
SMP_CACHE_BYTES, &vp_dev->vdev, SMP_CACHE_BYTES, &vp_dev->vdev,
true, true, ctx, true, true, ctx,
vp_notify, callback, name); vp_notify, callback, name);
if (!vq) if (!vq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* activate the queue */ vq->num_max = num;
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq), err = vp_active_vq(vq, msix_vec);
virtqueue_get_avail_addr(vq), if (err)
virtqueue_get_used_addr(vq)); goto err;
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL); vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) { if (!vq->priv) {
err = -ENOMEM; err = -ENOMEM;
goto err_map_notify; goto err;
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
}
} }
return vq; return vq;
err_assign_vector: err:
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
vring_del_virtqueue(vq); vring_del_virtqueue(vq);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -251,12 +351,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -251,12 +351,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq; struct virtqueue *vq;
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx,
desc);
if (rc) if (rc)
return rc; return rc;
...@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { ...@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.set_vq_affinity = vp_set_vq_affinity, .set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity, .get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region, .get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
}; };
static const struct virtio_config_ops virtio_pci_config_ops = { static const struct virtio_config_ops virtio_pci_config_ops = {
...@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = { ...@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.set_vq_affinity = vp_set_vq_affinity, .set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity, .get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region, .get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
}; };
/* the PCI probing function */ /* the PCI probing function */
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/virtio_pci_modern.h> #include <linux/virtio_pci_modern.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/delay.h>
/* /*
* vp_modern_map_capability - map a part of virtio pci capability * vp_modern_map_capability - map a part of virtio pci capability
...@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev, ...@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
} }
EXPORT_SYMBOL_GPL(vp_modern_set_status); EXPORT_SYMBOL_GPL(vp_modern_set_status);
/*
* vp_modern_get_queue_reset - get the queue reset status
* @mdev: the modern virtio-pci device
* @index: queue index
*/
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
return vp_ioread16(&cfg->queue_reset);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_reset);
/*
* vp_modern_set_queue_reset - reset the queue
* @mdev: the modern virtio-pci device
* @index: queue index
*/
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
vp_iowrite16(1, &cfg->queue_reset);
while (vp_ioread16(&cfg->queue_reset))
msleep(1);
while (vp_ioread16(&cfg->cfg.queue_enable))
msleep(1);
}
EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset);
/* /*
* vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
* @mdev: the modern virtio-pci device * @mdev: the modern virtio-pci device
......
This diff is collapsed.
...@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) ...@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
static struct virtqueue * static struct virtqueue *
virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, bool ctx) const char *name, u32 size, bool ctx)
{ {
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev); struct vdpa_device *vdpa = vd_get_vdpa(vdev);
...@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue; goto error_new_virtqueue;
} }
if (!size || size > max_num)
size = max_num;
if (ops->get_vq_num_min) if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdpa); min_num = ops->get_vq_num_min(vdpa);
may_reduce_num = (max_num == min_num) ? false : true; may_reduce_num = (size == min_num) ? false : true;
/* Create the vring */ /* Create the vring */
align = ops->get_vq_align(vdpa); align = ops->get_vq_align(vdpa);
vq = vring_create_virtqueue(index, max_num, align, vdev, vq = vring_create_virtqueue(index, size, align, vdev,
true, may_reduce_num, ctx, true, may_reduce_num, ctx,
virtio_vdpa_notify, callback, name); virtio_vdpa_notify, callback, name);
if (!vq) { if (!vq) {
...@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue; goto error_new_virtqueue;
} }
vq->num_max = max_num;
/* Setup virtqueue callback */ /* Setup virtqueue callback */
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
cb.private = info; cb.private = info;
...@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
continue; continue;
} }
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
callbacks[i], names[i], ctx ? names[i], sizes ? sizes[i] : 0,
ctx[i] : false); ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto err_setup_vq; goto err_setup_vq;
......
...@@ -150,6 +150,14 @@ enum { ...@@ -150,6 +150,14 @@ enum {
MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3,
}; };
/* This indicates that the object was not created or has already
* been desroyed. It is very safe to assume that this object will never
* have so many states
*/
enum {
MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
};
enum { enum {
MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0,
MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1,
......
...@@ -597,7 +597,7 @@ struct rproc_subdev { ...@@ -597,7 +597,7 @@ struct rproc_subdev {
/** /**
* struct rproc_vring - remoteproc vring state * struct rproc_vring - remoteproc vring state
* @va: virtual address * @va: virtual address
* @len: length, in bytes * @num: vring size
* @da: device address * @da: device address
* @align: vring alignment * @align: vring alignment
* @notifyid: rproc-specific unique vring index * @notifyid: rproc-specific unique vring index
...@@ -606,7 +606,7 @@ struct rproc_subdev { ...@@ -606,7 +606,7 @@ struct rproc_subdev {
*/ */
struct rproc_vring { struct rproc_vring {
void *va; void *va;
int len; int num;
u32 da; u32 da;
u32 align; u32 align;
int notifyid; int notifyid;
......
...@@ -218,6 +218,9 @@ struct vdpa_map_file { ...@@ -218,6 +218,9 @@ struct vdpa_map_file {
* @reset: Reset device * @reset: Reset device
* @vdev: vdpa device * @vdev: vdpa device
* Returns integer: success (0) or error (< 0) * Returns integer: success (0) or error (< 0)
* @suspend: Suspend or resume the device (optional)
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space includes * @get_config_size: Get the size of the configuration space includes
* fields that are conditional on feature bits. * fields that are conditional on feature bits.
* @vdev: vdpa device * @vdev: vdpa device
...@@ -319,6 +322,7 @@ struct vdpa_config_ops { ...@@ -319,6 +322,7 @@ struct vdpa_config_ops {
u8 (*get_status)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status); void (*set_status)(struct vdpa_device *vdev, u8 status);
int (*reset)(struct vdpa_device *vdev); int (*reset)(struct vdpa_device *vdev);
int (*suspend)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len); void *buf, unsigned int len);
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
* @priv: a pointer for the virtqueue implementation to use. * @priv: a pointer for the virtqueue implementation to use.
* @index: the zero-based ordinal number for this queue. * @index: the zero-based ordinal number for this queue.
* @num_free: number of elements we expect to be able to fit. * @num_free: number of elements we expect to be able to fit.
* @num_max: the maximum number of elements supported by the device.
* @reset: vq is in reset state or not.
* *
* A note on @num_free: with indirect buffers, each buffer needs one * A note on @num_free: with indirect buffers, each buffer needs one
* element in the queue, otherwise a buffer will need one element per * element in the queue, otherwise a buffer will need one element per
...@@ -31,7 +33,9 @@ struct virtqueue { ...@@ -31,7 +33,9 @@ struct virtqueue {
struct virtio_device *vdev; struct virtio_device *vdev;
unsigned int index; unsigned int index;
unsigned int num_free; unsigned int num_free;
unsigned int num_max;
void *priv; void *priv;
bool reset;
}; };
int virtqueue_add_outbuf(struct virtqueue *vq, int virtqueue_add_outbuf(struct virtqueue *vq,
...@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); ...@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
/** /**
* virtio_device - representation of a device using virtio * virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus * @index: unique position on the virtio bus
...@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev); ...@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev);
void virtio_break_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev);
void __virtio_unbreak_device(struct virtio_device *dev); void __virtio_unbreak_device(struct virtio_device *dev);
void __virtqueue_break(struct virtqueue *_vq);
void __virtqueue_unbreak(struct virtqueue *_vq);
void virtio_config_changed(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev);
......
...@@ -55,6 +55,7 @@ struct virtio_shm_region { ...@@ -55,6 +55,7 @@ struct virtio_shm_region {
* include a NULL entry for vqs that do not need a callback * include a NULL entry for vqs that do not need a callback
* names: array of virtqueue names (mainly for debugging) * names: array of virtqueue names (mainly for debugging)
* include a NULL entry for vqs unused by driver * include a NULL entry for vqs unused by driver
* sizes: array of virtqueue sizes
* Returns 0 on success or error status * Returns 0 on success or error status
* @del_vqs: free virtqueues found by find_vqs(). * @del_vqs: free virtqueues found by find_vqs().
* @synchronize_cbs: synchronize with the virtqueue callbacks (optional) * @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
...@@ -78,6 +79,18 @@ struct virtio_shm_region { ...@@ -78,6 +79,18 @@ struct virtio_shm_region {
* @set_vq_affinity: set the affinity for a virtqueue (optional). * @set_vq_affinity: set the affinity for a virtqueue (optional).
* @get_vq_affinity: get the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional).
* @get_shm_region: get a shared memory region based on the index. * @get_shm_region: get a shared memory region based on the index.
* @disable_vq_and_reset: reset a queue individually (optional).
* vq: the virtqueue
* Returns 0 on success or error status
* disable_vq_and_reset will guarantee that the callbacks are disabled and
* synchronized.
* Except for the callback, the caller should guarantee that the vring is
* not accessed by any functions of virtqueue.
* @enable_vq_after_reset: enable a reset queue
* vq: the virtqueue
* Returns 0 on success or error status
* If disable_vq_and_reset is set, then enable_vq_after_reset must also be
* set.
*/ */
typedef void vq_callback_t(struct virtqueue *); typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops { struct virtio_config_ops {
...@@ -91,7 +104,9 @@ struct virtio_config_ops { ...@@ -91,7 +104,9 @@ struct virtio_config_ops {
void (*reset)(struct virtio_device *vdev); void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs, int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc); struct irq_affinity *desc);
void (*del_vqs)(struct virtio_device *); void (*del_vqs)(struct virtio_device *);
void (*synchronize_cbs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *);
...@@ -104,6 +119,8 @@ struct virtio_config_ops { ...@@ -104,6 +119,8 @@ struct virtio_config_ops {
int index); int index);
bool (*get_shm_region)(struct virtio_device *vdev, bool (*get_shm_region)(struct virtio_device *vdev,
struct virtio_shm_region *region, u8 id); struct virtio_shm_region *region, u8 id);
int (*disable_vq_and_reset)(struct virtqueue *vq);
int (*enable_vq_after_reset)(struct virtqueue *vq);
}; };
/* If driver didn't advertise the feature, it will never appear. */ /* If driver didn't advertise the feature, it will never appear. */
...@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, ...@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
const char *names[] = { n }; const char *names[] = { n };
struct virtqueue *vq; struct virtqueue *vq;
int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
NULL); NULL, NULL);
if (err < 0) if (err < 0)
return ERR_PTR(err); return ERR_PTR(err);
return vq; return vq;
...@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const char * const names[],
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
NULL, desc);
} }
static inline static inline
...@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ...@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const bool *ctx, const char * const names[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
desc); ctx, desc);
}
static inline
int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx, struct irq_affinity *desc)
{
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
ctx, desc);
} }
/** /**
......
...@@ -5,6 +5,13 @@ ...@@ -5,6 +5,13 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/virtio_pci.h> #include <linux/virtio_pci.h>
struct virtio_pci_modern_common_cfg {
struct virtio_pci_common_cfg cfg;
__le16 queue_notify_data; /* read-write */
__le16 queue_reset; /* read-write */
};
struct virtio_pci_modern_device { struct virtio_pci_modern_device {
struct pci_dev *pci_dev; struct pci_dev *pci_dev;
...@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, ...@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
u16 index, resource_size_t *pa); u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev); int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev);
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
#endif #endif
...@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, ...@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name); const char *name);
/* Creates a virtqueue with a custom layout. */
struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
bool ctx,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
/* /*
* Creates a virtqueue with a standard layout but a caller-allocated * Creates a virtqueue with a standard layout but a caller-allocated
* ring. * ring.
......
...@@ -210,6 +210,53 @@ struct vduse_vq_eventfd { ...@@ -210,6 +210,53 @@ struct vduse_vq_eventfd {
*/ */
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
/**
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: start of the IOVA region
* @size: size of the IOVA region
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
* ioctls to register/de-register userspace memory for IOVA regions
*/
struct vduse_iova_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
__u64 reserved[3];
};
/* Register userspace memory for IOVA regions */
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
/* De-register the userspace memory. Caller should set iova and size field. */
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
/**
* struct vduse_iova_info - information of one IOVA region
* @start: start of the IOVA region
* @last: last of the IOVA region
* @capability: capability of the IOVA regsion
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
* one IOVA region.
*/
struct vduse_iova_info {
__u64 start;
__u64 last;
#define VDUSE_IOVA_CAP_UMEM (1 << 0)
__u64 capability;
__u64 reserved[3];
};
/*
* Find the first IOVA region that overlaps with the range [start, last]
* and return some information on it. Caller should set start and last fields.
*/
#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
/** /**
......
...@@ -171,4 +171,13 @@ ...@@ -171,4 +171,13 @@
#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
struct vhost_vring_state) struct vhost_vring_state)
/* Suspend a device so it does not process virtqueue requests anymore
*
* After the return of ioctl the device must preserve all the necessary state
* (the virtqueue vring base plus the possible device specific states) that is
* required for restoring in the future. The device must not change its
* configuration after that point.
*/
#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
#endif #endif
...@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { ...@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range {
* message * message
*/ */
#define VHOST_BACKEND_F_IOTLB_ASID 0x3 #define VHOST_BACKEND_F_IOTLB_ASID 0x3
/* Device can be suspended */
#define VHOST_BACKEND_F_SUSPEND 0x4
#endif #endif
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
* rest are per-device feature bits. * rest are per-device feature bits.
*/ */
#define VIRTIO_TRANSPORT_F_START 28 #define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 38 #define VIRTIO_TRANSPORT_F_END 41
#ifndef VIRTIO_CONFIG_NO_LEGACY #ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've /* Do we get callbacks when the ring is completely used, even if we've
...@@ -98,4 +98,9 @@ ...@@ -98,4 +98,9 @@
* Does the device support Single Root I/O Virtualization? * Does the device support Single Root I/O Virtualization?
*/ */
#define VIRTIO_F_SR_IOV 37 #define VIRTIO_F_SR_IOV 37
/*
* This feature indicates that the driver can reset a queue individually.
*/
#define VIRTIO_F_RING_RESET 40
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow
* Steering */ * Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
...@@ -355,4 +355,36 @@ struct virtio_net_hash_config { ...@@ -355,4 +355,36 @@ struct virtio_net_hash_config {
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0
/*
* Control notifications coalescing.
*
* Request the device to change the notifications coalescing parameters.
*
* Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
*/
#define VIRTIO_NET_CTRL_NOTF_COAL 6
/*
* Set the tx-usecs/tx-max-packets patameters.
* tx-usecs - Maximum number of usecs to delay a TX notification.
* tx-max-packets - Maximum number of packets to send before a TX notification.
*/
struct virtio_net_ctrl_coal_tx {
__le32 tx_max_packets;
__le32 tx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0
/*
* Set the rx-usecs/rx-max-packets patameters.
* rx-usecs - Maximum number of usecs to delay a RX notification.
* rx-max-frames - Maximum number of packets to receive before a RX notification.
*/
struct virtio_net_ctrl_coal_rx {
__le32 rx_max_packets;
__le32 rx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1
#endif /* _UAPI_LINUX_VIRTIO_NET_H */ #endif /* _UAPI_LINUX_VIRTIO_NET_H */
...@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap { ...@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap {
#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 #define VIRTIO_PCI_COMMON_Q_AVAILHI 44
#define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDLO 48
#define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_USEDHI 52
#define VIRTIO_PCI_COMMON_Q_NDATA 56
#define VIRTIO_PCI_COMMON_Q_RESET 58
#endif /* VIRTIO_PCI_NO_MODERN */ #endif /* VIRTIO_PCI_NO_MODERN */
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#define READ 0 #define READ 0
#define WRITE 1 #define WRITE 1
typedef unsigned long long phys_addr_t;
typedef unsigned long long dma_addr_t; typedef unsigned long long dma_addr_t;
typedef size_t __kernel_size_t; typedef size_t __kernel_size_t;
typedef unsigned int __wsum; typedef unsigned int __wsum;
...@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t ...@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
#endif #endif
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn_once(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define min(x, y) ({ \ #define min(x, y) ({ \
typeof(x) _min1 = (x); \ typeof(x) _min1 = (x); \
......
#include <limits.h>
#include "../../../include/linux/vringh.h" #include "../../../include/linux/vringh.h"
...@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) ...@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
memset(info->ring, 0, vring_size(num, 4096)); memset(info->ring, 0, vring_size(num, 4096));
vring_init(&info->vring, num, info->ring, 4096); vring_init(&info->vring, num, info->ring, 4096);
info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true, info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
false, vq_notify, vq_callback, "test"); info->ring, vq_notify, vq_callback, "test");
assert(info->vq); assert(info->vq);
info->vq->priv = info; info->vq->priv = info;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment