Commit 7a53e17a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - A huge patchset supporting vq resize using the new vq reset
   capability

 - Features, fixes, and cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
  vdpa/mlx5: Fix possible uninitialized return value
  vdpa_sim_blk: add support for discard and write-zeroes
  vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
  vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
  vdpa_sim_blk: check if sector is 0 for commands other than read or write
  vdpa_sim: Implement suspend vdpa op
  vhost-vdpa: uAPI to suspend the device
  vhost-vdpa: introduce SUSPEND backend feature bit
  vdpa: Add suspend operation
  virtio-blk: Avoid use-after-free on suspend/resume
  virtio_vdpa: support the arg sizes of find_vqs()
  vhost-vdpa: Call ida_simple_remove() when failed
  vDPA: fix 'cast to restricted le16' warnings in vdpa.c
  vDPA: !FEATURES_OK should not block querying device config space
  vDPA/ifcvf: support userspace to query features and MQ of a management device
  vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
  vhost scsi: Allow user to control num virtqueues
  vhost-scsi: Fix max number of virtqueues
  vdpa/mlx5: Support different address spaces for control and data
  vdpa/mlx5: Implement susupend virtqueue callback
  ...
parents 999324f5 93e530d2
......@@ -33,6 +33,10 @@ properties:
description: Required for devices making accesses thru an IOMMU.
maxItems: 1
wakeup-source:
type: boolean
description: Required for setting irq of a virtio_mmio device as wakeup source.
required:
- compatible
- reg
......
......@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
goto error_create;
}
vq->priv = info;
vq->num_max = num;
num = virtqueue_get_vring_size(vq);
if (vu_dev->protocol_features &
......@@ -1010,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
......
......@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
}
}
static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
return vq;
}
static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
{
struct scatterlist hdr, status, *sgs[3];
......@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
struct request *requeue_list = NULL;
rq_list_for_each_safe(rqlist, req, next) {
struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
bool kick;
if (!virtblk_prep_rq_batch(req)) {
......@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = hctx->driver_data;
struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
......@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
return found;
}
static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct virtio_blk *vblk = data;
struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
hctx->driver_data = vq;
return 0;
}
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.queue_rqs = virtio_queue_rqs,
.commit_rqs = virtio_commit_rqs,
.init_hctx = virtblk_init_hctx,
.complete = virtblk_request_done,
.map_queues = virtblk_map_queues,
.poll = virtblk_poll,
......
This diff is collapsed.
......@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
ndr_desc.res = &res;
ndr_desc.numa_node = nid;
ndr_desc.flush = async_pmem_flush;
ndr_desc.provider_data = vdev;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
/*
* The NVDIMM region could be available before the
* virtio_device_ready() that is called by
* virtio_dev_probe(), so we set device ready here.
*/
virtio_device_ready(vdev);
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
if (!nd_region) {
dev_err(&vdev->dev, "failed to create nvdimm region\n");
err = -ENXIO;
goto out_nd;
}
nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
return 0;
out_nd:
virtio_reset_device(vdev);
nvdimm_bus_unregister(vpmem->nvdimm_bus);
out_vq:
vdev->config->del_vqs(vdev);
......
......@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
{
......@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
goto error;
}
vq->num_max = vring->num;
vqs[i] = vq;
vring->vq = vq;
vq->priv = vring;
......
......@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
size_t size;
/* actual size of vring (in bytes) */
size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
size = PAGE_ALIGN(vring_size(rvring->num, rvring->align));
rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
......@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
return -EINVAL;
}
rvring->len = vring->num;
rvring->num = vring->num;
rvring->align = vring->align;
rvring->rvdev = rvdev;
......
......@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
struct fw_rsc_vdev *rsc;
struct virtqueue *vq;
void *addr;
int len, size;
int num, size;
/* we're temporarily limited to two virtqueues per rvdev */
if (id >= ARRAY_SIZE(rvdev->vring))
......@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
rvring = &rvdev->vring[id];
addr = mem->va;
len = rvring->len;
num = rvring->num;
/* zero vring */
size = vring_size(len, rvring->align);
size = vring_size(num, rvring->align);
memset(addr, 0, size);
dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n",
id, addr, len, rvring->notifyid);
id, addr, num, rvring->notifyid);
/*
* Create the new vq, and tell virtio we're not interested in
* the 'weak' smp barriers, since we're talking with a real device.
*/
vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
vq = vring_new_virtqueue(id, num, rvring->align, vdev, false, ctx,
addr, rproc_virtio_notify, callback, name);
if (!vq) {
dev_err(dev, "vring_new_virtqueue %s failed\n", name);
......@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
return ERR_PTR(-ENOMEM);
}
vq->num_max = num;
rvring->vq = vq;
vq->priv = rvring;
......@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool * ctx,
struct irq_affinity *desc)
{
......
......@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
err = -ENOMEM;
goto out_err;
}
vq->num_max = info->num;
/* it may have been reduced */
info->num = virtqueue_get_vring_size(vq);
......@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
{
......
......@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
cfg = hw->common_cfg;
vp_iowrite16(vector, &cfg->msix_config);
return vp_ioread16(&cfg->msix_config);
......@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cap_addr(hw, &cap);
hw->cap_dev_config_size = le32_to_cpu(cap.length);
IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
break;
}
......@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
{
struct ifcvf_adapter *adapter;
u32 net_config_size = sizeof(struct virtio_net_config);
u32 blk_config_size = sizeof(struct virtio_blk_config);
u32 cap_size = hw->cap_dev_config_size;
u32 config_size;
adapter = vf_to_adapter(hw);
/* If the onboard device config space size is greater than
* the size of struct virtio_net/blk_config, only the spec
* implementing contents size is returned, this is very
* unlikely, defensive programming.
*/
switch (hw->dev_type) {
case VIRTIO_ID_NET:
config_size = sizeof(struct virtio_net_config);
config_size = min(cap_size, net_config_size);
break;
case VIRTIO_ID_BLOCK:
config_size = sizeof(struct virtio_blk_config);
config_size = min(cap_size, blk_config_size);
break;
default:
config_size = 0;
......
......@@ -87,6 +87,8 @@ struct ifcvf_hw {
int config_irq;
int vqs_reused_irq;
u16 nr_vring;
/* VIRTIO_PCI_CAP_DEVICE_CFG size */
u32 cap_dev_config_size;
};
struct ifcvf_adapter {
......
......@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
}
/*
* IFCVF currently does't have on-chip IOMMU, so not
* IFCVF currently doesn't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap()
*/
static const struct vdpa_config_ops ifc_vdpa_ops = {
......@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct ifcvf_adapter *adapter;
struct vdpa_device *vdpa_dev;
struct pci_dev *pdev;
struct ifcvf_hw *vf;
struct device *dev;
int ret, i;
int ret;
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
if (ifcvf_mgmt_dev->adapter)
if (!ifcvf_mgmt_dev->adapter)
return -EOPNOTSUPP;
pdev = ifcvf_mgmt_dev->pdev;
dev = &pdev->dev;
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, name, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
ifcvf_mgmt_dev->adapter = adapter;
adapter = ifcvf_mgmt_dev->adapter;
vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
pdev = adapter->pdev;
vdpa_dev = &adapter->vdpa;
adapter->pdev = pdev;
adapter->vdpa.dma_dev = &pdev->dev;
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
goto err;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
if (name)
ret = dev_set_name(&vdpa_dev->dev, "%s", name);
else
ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index);
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
if (ret) {
put_device(&adapter->vdpa.dev);
IFCVF_ERR(pdev, "Failed to register to vDPA bus");
goto err;
return ret;
}
return 0;
err:
put_device(&adapter->vdpa.dev);
return ret;
}
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
......@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct device *dev = &pdev->dev;
struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
u32 dev_type;
int ret;
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->pdev = pdev;
int ret, i;
ret = pcim_enable_device(pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to enable device\n");
goto err;
return ret;
}
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
IFCVF_DRIVER_NAME);
if (ret) {
IFCVF_ERR(pdev, "Failed to request MMIO region\n");
goto err;
return ret;
}
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
IFCVF_ERR(pdev, "No usable DMA configuration\n");
goto err;
return ret;
}
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
if (ret) {
IFCVF_ERR(pdev,
"Failed for adding devres for freeing irq vectors\n");
goto err;
return ret;
}
pci_set_master(pdev);
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, NULL, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
adapter->vdpa.dma_dev = &pdev->dev;
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
return ret;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->adapter = adapter;
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
if (ret) {
IFCVF_ERR(pdev,
......
......@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
struct mlx5_vdpa_dev *mvdev;
};
enum {
MLX5_VDPA_DATAVQ_GROUP,
MLX5_VDPA_CVQ_GROUP,
MLX5_VDPA_NUMVQ_GROUPS
};
enum {
MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
};
struct mlx5_vdpa_dev {
struct vdpa_device vdev;
struct mlx5_core_dev *mdev;
......@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
struct mlx5_vdpa_mr mr;
struct mlx5_control_vq cvq;
struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
};
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
......
......@@ -164,6 +164,7 @@ struct mlx5_vdpa_net {
bool setup;
u32 cur_num_vqs;
u32 rqt_size;
bool nb_registered;
struct notifier_block nb;
struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent;
......@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
if (err)
goto err_cmd;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
......@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
return;
}
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq);
}
......@@ -1121,6 +1124,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
return err;
}
static bool is_valid_state_change(int oldstate, int newstate)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default:
return false;
}
}
static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
{
int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
......@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
void *in;
int err;
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
return 0;
if (!is_valid_state_change(mvq->fw_state, state))
return -EINVAL;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
......@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
memset(dmac_c, 0xff, ETH_ALEN);
eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
if (tagged) {
......@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
int err;
if (!mvdev->actual_features)
return;
......@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
}
mvq = &ndev->vqs[idx];
if (!ready)
if (!ready) {
suspend_vq(ndev, mvq);
} else {
err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
if (err) {
mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
ready = false;
}
}
mvq->ready = ready;
}
......@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
return PAGE_SIZE;
}
static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
{
return 0;
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (is_ctrl_vq_idx(mvdev, idx))
return MLX5_VDPA_CVQ_GROUP;
return MLX5_VDPA_DATAVQ_GROUP;
}
enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
......@@ -2511,6 +2548,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock);
}
static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
{
int i;
/* default mapping all groups are mapped to asid 0 */
for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
mvdev->group2asid[i] = 0;
}
static int mlx5_vdpa_reset(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
......@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
ndev->mvdev.actual_features = 0;
init_group_to_asid_map(mvdev);
++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_mr(mvdev, NULL))
mlx5_vdpa_warn(mvdev, "create MR failed\n");
......@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation;
}
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
struct vhost_iotlb *iotlb)
static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
u64 start = 0ULL, last = 0ULL - 1;
struct vhost_iotlb_map *map;
int err = 0;
spin_lock(&mvdev->cvq.iommu_lock);
vhost_iotlb_reset(mvdev->cvq.iotlb);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
map->last, map->addr, map->perm);
if (err)
goto out;
}
out:
spin_unlock(&mvdev->cvq.iommu_lock);
return err;
}
static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
bool change_map;
int err;
down_write(&ndev->reslock);
err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
goto err;
return err;
}
if (change_map)
err = mlx5_vdpa_change_map(mvdev, iotlb);
err:
return err;
}
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err = -EINVAL;
down_write(&ndev->reslock);
if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
err = set_map_data(mvdev, iotlb);
if (err)
goto out;
}
if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
err = set_map_control(mvdev, iotlb);
out:
up_write(&ndev->reslock);
return err;
}
......@@ -2733,6 +2818,49 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
return err;
}
static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_control_vq *cvq;
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
return;
cvq = &mvdev->cvq;
cvq->ready = false;
}
static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
int i;
down_write(&ndev->reslock);
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
flush_workqueue(ndev->mvdev.wq);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
}
mlx5_vdpa_cvq_suspend(mvdev);
up_write(&ndev->reslock);
return 0;
}
static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
unsigned int asid)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (group >= MLX5_VDPA_NUMVQ_GROUPS)
return -EINVAL;
mvdev->group2asid[group] = asid;
return 0;
}
static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_vq_address = mlx5_vdpa_set_vq_address,
.set_vq_num = mlx5_vdpa_set_vq_num,
......@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map,
.set_group_asid = mlx5_set_group_asid,
.free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
};
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
......@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
mvq->index = i;
mvq->ndev = ndev;
mvq->fwqp.fw = true;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
}
for (; i < ndev->mvdev.max_vqs; i++) {
mvq = &ndev->vqs[i];
......@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
down_read(&ndev->reslock);
if (!ndev->nb_registered) {
up_read(&ndev->reslock);
return NOTIFY_DONE;
}
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
if (!wqent)
if (!wqent) {
up_read(&ndev->reslock);
return NOTIFY_DONE;
}
wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work);
up_read(&ndev->reslock);
ret = NOTIFY_OK;
break;
default:
......@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
}
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1, 1, name, false);
MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
......@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb);
ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
......@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq;
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
if (ndev->nb_registered) {
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
}
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
......
......@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
config.mac))
return -EMSGSIZE;
val_u16 = le16_to_cpu(config.status);
val_u16 = __virtio16_to_cpu(true, config.status);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
return -EMSGSIZE;
val_u16 = le16_to_cpu(config.mtu);
val_u16 = __virtio16_to_cpu(true, config.mtu);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE;
......@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
{
u32 device_id;
void *hdr;
u8 status;
int err;
down_read(&vdev->cf_lock);
status = vdev->config->get_status(vdev);
if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
err = -EAGAIN;
goto out;
}
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET);
if (!hdr) {
......@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
}
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
max_vqp = le16_to_cpu(config.max_virtqueue_pairs);
max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
return -EMSGSIZE;
......
......@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
"Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
......@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(&vdpasim->iommu[i]);
vdpasim->running = true;
spin_unlock(&vdpasim->iommu_lock);
vdpasim->features = 0;
......@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu;
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_init(&vdpasim->iommu[i], 0, 0);
vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer)
......@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0;
}
static int vdpasim_suspend(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
spin_lock(&vdpasim->lock);
vdpasim->running = false;
spin_unlock(&vdpasim->lock);
return 0;
}
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
......@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
......@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
......
......@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation;
u64 features;
u32 groups;
bool running;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
......
This diff is collapsed.
......@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
spin_lock(&vdpasim->lock);
if (!vdpasim->running)
goto out;
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
......
......@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size,
{
unsigned long pfn = PFN_DOWN(orig);
unsigned int offset = offset_in_page(orig);
char *buffer;
struct page *page;
unsigned int sz = 0;
while (size) {
sz = min_t(size_t, PAGE_SIZE - offset, size);
buffer = kmap_atomic(pfn_to_page(pfn));
page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE)
memcpy(addr, buffer + offset, sz);
memcpy_from_page(addr, page, offset, sz);
else
memcpy(buffer + offset, addr, sz);
kunmap_atomic(buffer);
memcpy_to_page(page, offset, addr, sz);
size -= sz;
pfn++;
......@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
map->orig_phys == INVALID_PHYS_ADDR))
return;
addr = page_address(map->bounce_page) + offset;
do_bounce(map->orig_phys + offset, addr, sz, dir);
addr = kmap_local_page(map->bounce_page);
do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
kunmap_local(addr);
size -= sz;
iova += sz;
}
......@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
struct vduse_bounce_map *map;
struct page *page = NULL;
spin_lock(&domain->iotlb_lock);
read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
if (!map->bounce_page)
if (domain->user_bounce_pages || !map->bounce_page)
goto out;
page = map->bounce_page;
get_page(page);
out:
spin_unlock(&domain->iotlb_lock);
read_unlock(&domain->bounce_lock);
return page;
}
static void
vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns;
......@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
}
}
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count)
{
struct vduse_bounce_map *map;
int i, ret;
/* Now we don't support partial mapping */
if (count != (domain->bounce_size >> PAGE_SHIFT))
return -EINVAL;
write_lock(&domain->bounce_lock);
ret = -EEXIST;
if (domain->user_bounce_pages)
goto out;
for (i = 0; i < count; i++) {
map = &domain->bounce_maps[i];
if (map->bounce_page) {
/* Copy kernel page to user page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR)
memcpy_to_page(pages[i], 0,
page_address(map->bounce_page),
PAGE_SIZE);
__free_page(map->bounce_page);
}
map->bounce_page = pages[i];
get_page(pages[i]);
}
domain->user_bounce_pages = true;
ret = 0;
out:
write_unlock(&domain->bounce_lock);
return ret;
}
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long i, count;
write_lock(&domain->bounce_lock);
if (!domain->user_bounce_pages)
goto out;
count = domain->bounce_size >> PAGE_SHIFT;
for (i = 0; i < count; i++) {
struct page *page = NULL;
map = &domain->bounce_maps[i];
if (WARN_ON(!map->bounce_page))
continue;
/* Copy user page to kernel page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR) {
page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
memcpy_from_page(page_address(page),
map->bounce_page, 0, PAGE_SIZE);
}
put_page(map->bounce_page);
map->bounce_page = page;
}
domain->user_bounce_pages = false;
out:
write_unlock(&domain->bounce_lock);
}
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
{
if (!domain->bounce_map)
......@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
if (vduse_domain_init_bounce_map(domain))
goto err;
read_lock(&domain->bounce_lock);
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
goto err;
goto err_unlock;
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
read_unlock(&domain->bounce_lock);
return iova;
err_unlock:
read_unlock(&domain->bounce_lock);
err:
vduse_domain_free_iova(iovad, iova, size);
return DMA_MAPPING_ERROR;
......@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
{
struct iova_domain *iovad = &domain->stream_iovad;
read_lock(&domain->bounce_lock);
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
read_unlock(&domain->bounce_lock);
vduse_domain_free_iova(iovad, dma_addr, size);
}
......@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file)
spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
vduse_domain_free_bounce_pages(domain);
vduse_domain_remove_user_bounce_pages(domain);
vduse_domain_free_kernel_bounce_pages(domain);
spin_unlock(&domain->iotlb_lock);
put_iova_domain(&domain->stream_iovad);
put_iova_domain(&domain->consistent_iovad);
......@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
goto err_file;
domain->file = file;
rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN);
......
......@@ -14,6 +14,7 @@
#include <linux/iova.h>
#include <linux/dma-mapping.h>
#include <linux/vhost_iotlb.h>
#include <linux/rwlock.h>
#define IOVA_START_PFN 1
......@@ -34,6 +35,8 @@ struct vduse_iova_domain {
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
struct file *file;
bool user_bounce_pages;
rwlock_t bounce_lock;
};
int vduse_domain_set_map(struct vduse_iova_domain *domain,
......@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count);
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain);
void vduse_domain_destroy(struct vduse_iova_domain *domain);
struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
......
......@@ -21,6 +21,8 @@
#include <linux/uio.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h>
......@@ -64,6 +66,13 @@ struct vduse_vdpa {
struct vduse_dev *dev;
};
struct vduse_umem {
unsigned long iova;
unsigned long npages;
struct page **pages;
struct mm_struct *mm;
};
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
......@@ -95,6 +104,8 @@ struct vduse_dev {
u8 status;
u32 vq_num;
u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
};
struct vduse_dev_msg {
......@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
return ret;
}
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
u64 iova, u64 size)
{
int ret;
mutex_lock(&dev->mem_lock);
ret = -ENOENT;
if (!dev->umem)
goto unlock;
ret = -EINVAL;
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
vduse_domain_remove_user_bounce_pages(dev->domain);
unpin_user_pages_dirty_lock(dev->umem->pages,
dev->umem->npages, true);
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
mmdrop(dev->umem->mm);
vfree(dev->umem->pages);
kfree(dev->umem);
dev->umem = NULL;
ret = 0;
unlock:
mutex_unlock(&dev->mem_lock);
return ret;
}
static int vduse_dev_reg_umem(struct vduse_dev *dev,
u64 iova, u64 uaddr, u64 size)
{
struct page **page_list = NULL;
struct vduse_umem *umem = NULL;
long pinned = 0;
unsigned long npages, lock_limit;
int ret;
if (!dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
mutex_lock(&dev->mem_lock);
ret = -EEXIST;
if (dev->umem)
goto unlock;
ret = -ENOMEM;
npages = size >> PAGE_SHIFT;
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
GFP_KERNEL_ACCOUNT);
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!page_list || !umem)
goto unlock;
mmap_read_lock(current->mm);
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
goto out;
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
page_list, NULL);
if (pinned != npages) {
ret = pinned < 0 ? pinned : -ENOMEM;
goto out;
}
ret = vduse_domain_add_user_bounce_pages(dev->domain,
page_list, pinned);
if (ret)
goto out;
atomic64_add(npages, &current->mm->pinned_vm);
umem->pages = page_list;
umem->npages = pinned;
umem->iova = iova;
umem->mm = current->mm;
mmgrab(current->mm);
dev->umem = umem;
out:
if (ret && pinned > 0)
unpin_user_pages(page_list, pinned);
mmap_read_unlock(current->mm);
unlock:
if (ret) {
vfree(page_list);
kfree(umem);
}
mutex_unlock(&dev->mem_lock);
return ret;
}
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
......@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
break;
}
case VDUSE_IOTLB_REG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
break;
ret = -EINVAL;
if (info.start > info.last)
break;
if (!is_mem_zero((const char *)info.reserved,
sizeof(info.reserved)))
break;
spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
if (domain->bounce_map && map->start == 0 &&
map->last == domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
spin_unlock(&domain->iotlb_lock);
if (!map)
break;
ret = -EFAULT;
if (copy_to_user(argp, &info, sizeof(info)))
break;
ret = 0;
break;
}
default:
ret = -ENOIOCTLCMD;
break;
......@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
......@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL;
mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
......
......@@ -159,9 +159,13 @@ enum {
};
#define VHOST_SCSI_MAX_TARGET 256
#define VHOST_SCSI_MAX_VQ 128
#define VHOST_SCSI_MAX_IO_VQ 1024
#define VHOST_SCSI_MAX_EVENT 128
static unsigned vhost_scsi_max_io_vqs = 128;
module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644);
MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024.");
struct vhost_scsi_virtqueue {
struct vhost_virtqueue vq;
/*
......@@ -186,7 +190,9 @@ struct vhost_scsi {
char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
struct vhost_dev dev;
struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
struct vhost_scsi_virtqueue *vqs;
unsigned long *compl_bitmap;
struct vhost_scsi_inflight **old_inflight;
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
......@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
struct vhost_virtqueue *vq;
int idx, i;
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
......@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
{
struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
vs_completion_work);
DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
struct virtio_scsi_cmd_resp v_rsp;
struct vhost_scsi_cmd *cmd, *t;
struct llist_node *llnode;
......@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
struct iov_iter iov_iter;
int ret, vq;
bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
llnode = llist_del_all(&vs->vs_completion_list);
llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
se_cmd = &cmd->tvc_se_cmd;
......@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs;
__set_bit(vq, signal);
__set_bit(vq, vs->compl_bitmap);
} else
pr_err("Faulted on virtio_scsi_cmd_resp\n");
......@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
}
vq = -1;
while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
< VHOST_SCSI_MAX_VQ)
while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
< vs->dev.nvqs)
vhost_signal(&vs->dev, &vs->vqs[vq].vq);
}
......@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
/* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs)
{
struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
int i;
/* Init new inflight and remember the old inflight */
vhost_scsi_init_inflight(vs, old_inflight);
vhost_scsi_init_inflight(vs, vs->old_inflight);
/*
* The inflight->kref was initialized to 1. We decrement it here to
* indicate the start of the flush operation so that it will reach 0
* when all the reqs are finished.
*/
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
for (i = 0; i < vs->dev.nvqs; i++)
kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight);
/* Flush both the vhost poll and vhost work */
vhost_dev_flush(&vs->dev);
/* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
wait_for_completion(&old_inflight[i]->comp);
for (i = 0; i < vs->dev.nvqs; i++)
wait_for_completion(&vs->old_inflight[i]->comp);
}
static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
......@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn));
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
if (!vhost_vq_is_setup(vq))
continue;
......@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
goto destroy_vq_cmds;
}
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, vs_tpg);
......@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
target_undepend_item(&se_tpg->tpg_group.cg_item);
}
if (match) {
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, NULL);
......@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
/* Make sure cmds are not running before tearing them down. */
vhost_scsi_flush(vs);
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
vhost_scsi_destroy_vq_cmds(vq);
}
......@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
return -EFAULT;
}
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vq->acked_features = features;
......@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
{
struct vhost_scsi *vs;
struct vhost_virtqueue **vqs;
int r = -ENOMEM, i;
int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
goto err_vs;
vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL);
if (!vqs)
if (nvqs > VHOST_SCSI_MAX_IO_VQ) {
pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs,
VHOST_SCSI_MAX_IO_VQ);
nvqs = VHOST_SCSI_MAX_IO_VQ;
} else if (nvqs == 0) {
pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs);
nvqs = 1;
}
nvqs += VHOST_SCSI_VQ_IO;
vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
if (!vs->compl_bitmap)
goto err_compl_bitmap;
vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
GFP_KERNEL | __GFP_ZERO);
if (!vs->old_inflight)
goto err_inflight;
vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs),
GFP_KERNEL | __GFP_ZERO);
if (!vs->vqs)
goto err_vqs;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs)
goto err_local_vqs;
vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
......@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL);
......@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
f->private_data = vs;
return 0;
err_local_vqs:
kfree(vs->vqs);
err_vqs:
kfree(vs->old_inflight);
err_inflight:
bitmap_free(vs->compl_bitmap);
err_compl_bitmap:
kvfree(vs);
err_vs:
return r;
......@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_dev_stop(&vs->dev);
vhost_dev_cleanup(&vs->dev);
kfree(vs->dev.vqs);
kfree(vs->vqs);
kfree(vs->old_inflight);
bitmap_free(vs->compl_bitmap);
kvfree(vs);
return 0;
}
......
......@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return 0;
}
static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return ops->suspend;
}
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
......@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
return 0;
}
/* After a successful return of ioctl the device must not process more
* virtqueue descriptors. The device can answer to read or writes of config
* fields as if it were not suspended. In particular, writing to "queue_enable"
* with a value of 1 will not make the device start processing buffers.
*/
static long vhost_vdpa_suspend(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (!ops->suspend)
return -EOPNOTSUPP;
return ops->suspend(vdpa);
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
......@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (features & ~VHOST_VDPA_BACKEND_FEATURES)
if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
!vhost_vdpa_can_suspend(v))
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
return 0;
......@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
if (vhost_vdpa_can_suspend(v))
features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
......@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_VQS_COUNT:
r = vhost_vdpa_get_vqs_count(v, argp);
break;
case VHOST_VDPA_SUSPEND:
r = vhost_vdpa_suspend(v);
break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
......@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
if (!bus)
return -EFAULT;
if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
return -ENOTSUPP;
v->domain = iommu_domain_alloc(bus);
......@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
err:
put_device(&v->dev);
ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r;
}
......
......@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
static int iotlb_translate(const struct vringh *vrh,
u64 addr, u64 len, struct bio_vec iov[],
u64 addr, u64 len, u64 *translated,
struct bio_vec iov[],
int iov_size, u32 perm)
{
struct vhost_iotlb_map *map;
......@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh,
spin_unlock(vrh->iotlb_lock);
if (translated)
*translated = min(len, s);
return ret;
}
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
struct iov_iter iter;
struct bio_vec iov[16];
int ret;
u64 total_translated = 0;
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len, iov, 16, VHOST_MAP_RO);
if (ret < 0)
return ret;
while (total_translated < len) {
struct bio_vec iov[16];
struct iov_iter iter;
u64 translated;
int ret;
iov_iter_bvec(&iter, READ, iov, ret, len);
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
ret = copy_from_iter(dst, len, &iter);
iov_iter_bvec(&iter, READ, iov, ret, translated);
return ret;
ret = copy_from_iter(dst, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return total_translated;
}
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
struct iov_iter iter;
struct bio_vec iov[16];
int ret;
u64 total_translated = 0;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len, iov, 16, VHOST_MAP_WO);
if (ret < 0)
return ret;
while (total_translated < len) {
struct bio_vec iov[16];
struct iov_iter iter;
u64 translated;
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
iov_iter_bvec(&iter, WRITE, iov, ret, len);
iov_iter_bvec(&iter, WRITE, iov, ret, translated);
ret = copy_to_iter(src, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return copy_to_iter(src, len, &iter);
return total_translated;
}
static inline int getu16_iotlb(const struct vringh *vrh,
......@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh,
int ret;
/* Atomic read is needed for getu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_RO);
if (ret < 0)
return ret;
......@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh,
int ret;
/* Atomic write is needed for putu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_WO);
if (ret < 0)
return ret;
......
......@@ -35,11 +35,12 @@ if VIRTIO_MENU
config VIRTIO_HARDEN_NOTIFICATION
bool "Harden virtio notification"
depends on BROKEN
help
Enable this to harden the device notifications and suppress
those that happen at a time where notifications are illegal.
Experimental: Note that several drivers still have bugs that
Experimental: Note that several drivers still have issues that
may cause crashes or hangs when correct handling of
notifications is enforced; depending on the subset of
drivers and devices you use, this may or may not work.
......@@ -126,9 +127,11 @@ config VIRTIO_MEM
This driver provides access to virtio-mem paravirtualized memory
devices, allowing to hotplug and hotunplug memory.
This driver was only tested under x86-64 and arm64, but should
theoretically work on all architectures that support memory hotplug
and hotremove.
This driver currently only supports x86-64 and arm64. Although it
should compile on other architectures that implement memory
hot(un)plug, architecture-specific and/or common
code changes may be required for virtio-mem, kdump and kexec to work as
expected.
If unsure, say M.
......
......@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev)
goto out;
dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index);
err = dev_set_name(&dev->dev, "virtio%u", dev->index);
if (err)
goto out_ida_remove;
err = virtio_device_of_init(dev);
if (err)
......
......@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev)
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name, bool ctx)
const char *name, u32 size, bool ctx)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
......@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
goto error_new_virtqueue;
}
if (!size || size > num)
size = num;
/* Create the vring */
vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, true, ctx, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
}
vq->num_max = num;
/* Activate the queue */
writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
if (vm_dev->version == 1) {
......@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
{
......@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
if (err)
return err;
if (of_property_read_bool(vm_dev->pdev->dev.of_node, "wakeup-source"))
enable_irq_wake(irq);
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
......@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
}
vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);
......
......@@ -174,6 +174,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name,
u32 size,
bool ctx,
u16 msix_vec)
{
......@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in
if (!info)
return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
msix_vec);
if (IS_ERR(vq))
goto out_info;
......@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq)
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
/*
* If it fails during re-enable reset vq. This way we won't rejoin
* info->node to the queue. Prevent unexpected irqs.
*/
if (!vq->reset) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
}
vp_dev->del_vq(info);
kfree(info);
......@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], bool per_vq_vectors,
const char * const names[], u32 sizes[], bool per_vq_vectors,
const bool *ctx,
struct irq_affinity *desc)
{
......@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false,
msix_vec);
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false, msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
......@@ -351,7 +358,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx)
const char * const names[], u32 sizes[], const bool *ctx)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err, queue_idx = 0;
......@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
continue;
}
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false,
VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
......@@ -390,21 +398,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc)
{
int err;
/* Try MSI-X with one vector per queue. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc);
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
}
const char *vp_bus_name(struct virtio_device *vdev)
......
......@@ -80,6 +80,7 @@ struct virtio_pci_device {
unsigned int idx,
void (*callback)(struct virtqueue *vq),
const char *name,
u32 size,
bool ctx,
u16 msix_vec);
void (*del_vq)(struct virtio_pci_vq_info *info);
......@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev);
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc);
const char *vp_bus_name(struct virtio_device *vdev);
......
......@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name,
u32 size,
bool ctx,
u16 msix_vec)
{
......@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
return ERR_PTR(-ENOENT);
if (!size || size > num)
size = num;
info->msix_vector = msix_vec;
/* create the vring */
vq = vring_create_virtqueue(index, num,
vq = vring_create_virtqueue(index, size,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, false, ctx,
vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
vq->num_max = num;
q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
if (q_pfn >> 32) {
dev_err(&vp_dev->pci_dev->dev,
......
......@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
if (features & BIT_ULL(VIRTIO_F_RING_RESET))
__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
}
/* virtio config->finalize_features() implementation */
......@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev)
vp_synchronize_vectors(vdev);
}
static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
unsigned long index;
index = vq->index;
/* activate the queue */
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR)
return -EBUSY;
}
return 0;
}
static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags;
if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
return -ENOENT;
vp_modern_set_queue_reset(mdev, vq->index);
info = vp_dev->vqs[vq->index];
/* delete vq from irq handler */
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
INIT_LIST_HEAD(&info->node);
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_break(vq);
#endif
/* For the case where vq has an exclusive irq, call synchronize_irq() to
* wait for completion.
*
* note: We can't use disable_irq() since it conflicts with the affinity
* managed IRQ that is used by some drivers.
*/
if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
vq->reset = true;
return 0;
}
static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags, index;
int err;
if (!vq->reset)
return -EBUSY;
index = vq->index;
info = vp_dev->vqs[index];
if (vp_modern_get_queue_reset(mdev, index))
return -EBUSY;
if (vp_modern_get_queue_enable(mdev, index))
return -EBUSY;
err = vp_active_vq(vq, info->msix_vector);
if (err)
return err;
if (vq->callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_unbreak(vq);
#endif
vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
vq->reset = false;
return 0;
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
return vp_modern_config_vector(&vp_dev->mdev, vector);
......@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name,
u32 size,
bool ctx,
u16 msix_vec)
{
......@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
if (num & (num - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
if (!size || size > num)
size = num;
if (size & (size - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size);
return ERR_PTR(-EINVAL);
}
info->msix_vector = msix_vec;
/* create the vring */
vq = vring_create_virtqueue(index, num,
vq = vring_create_virtqueue(index, size,
SMP_CACHE_BYTES, &vp_dev->vdev,
true, true, ctx,
vp_notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
/* activate the queue */
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
vq->num_max = num;
err = vp_active_vq(vq, msix_vec);
if (err)
goto err;
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
}
goto err;
}
return vq;
err_assign_vector:
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
err:
vring_del_virtqueue(vq);
return ERR_PTR(err);
}
......@@ -251,12 +351,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx,
desc);
if (rc)
return rc;
......@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
};
static const struct virtio_config_ops virtio_pci_config_ops = {
......@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
};
/* the PCI probing function */
......
......@@ -3,6 +3,7 @@
#include <linux/virtio_pci_modern.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/delay.h>
/*
* vp_modern_map_capability - map a part of virtio pci capability
......@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
}
EXPORT_SYMBOL_GPL(vp_modern_set_status);
/*
* vp_modern_get_queue_reset - get the queue reset status
* @mdev: the modern virtio-pci device
* @index: queue index
*/
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
return vp_ioread16(&cfg->queue_reset);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_reset);
/*
* vp_modern_set_queue_reset - reset the queue
* @mdev: the modern virtio-pci device
* @index: queue index
*/
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
vp_iowrite16(1, &cfg->queue_reset);
while (vp_ioread16(&cfg->queue_reset))
msleep(1);
while (vp_ioread16(&cfg->cfg.queue_enable))
msleep(1);
}
EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset);
/*
* vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
* @mdev: the modern virtio-pci device
......
This diff is collapsed.
......@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
static struct virtqueue *
virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name, bool ctx)
const char *name, u32 size, bool ctx)
{
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
......@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue;
}
if (!size || size > max_num)
size = max_num;
if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdpa);
may_reduce_num = (max_num == min_num) ? false : true;
may_reduce_num = (size == min_num) ? false : true;
/* Create the vring */
align = ops->get_vq_align(vdpa);
vq = vring_create_virtqueue(index, max_num, align, vdev,
vq = vring_create_virtqueue(index, size, align, vdev,
true, may_reduce_num, ctx,
virtio_vdpa_notify, callback, name);
if (!vq) {
......@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue;
}
vq->num_max = max_num;
/* Setup virtqueue callback */
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
cb.private = info;
......@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
{
......@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
continue;
}
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
callbacks[i], names[i], ctx ?
ctx[i] : false);
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
names[i], sizes ? sizes[i] : 0,
ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto err_setup_vq;
......
......@@ -150,6 +150,14 @@ enum {
MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3,
};
/* This indicates that the object was not created or has already
* been desroyed. It is very safe to assume that this object will never
* have so many states
*/
enum {
MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
};
enum {
MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0,
MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1,
......
......@@ -597,7 +597,7 @@ struct rproc_subdev {
/**
* struct rproc_vring - remoteproc vring state
* @va: virtual address
* @len: length, in bytes
* @num: vring size
* @da: device address
* @align: vring alignment
* @notifyid: rproc-specific unique vring index
......@@ -606,7 +606,7 @@ struct rproc_subdev {
*/
struct rproc_vring {
void *va;
int len;
int num;
u32 da;
u32 align;
int notifyid;
......
......@@ -218,6 +218,9 @@ struct vdpa_map_file {
* @reset: Reset device
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
* @suspend: Suspend or resume the device (optional)
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space includes
* fields that are conditional on feature bits.
* @vdev: vdpa device
......@@ -319,6 +322,7 @@ struct vdpa_config_ops {
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
int (*reset)(struct vdpa_device *vdev);
int (*suspend)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
......
......@@ -19,6 +19,8 @@
* @priv: a pointer for the virtqueue implementation to use.
* @index: the zero-based ordinal number for this queue.
* @num_free: number of elements we expect to be able to fit.
* @num_max: the maximum number of elements supported by the device.
* @reset: vq is in reset state or not.
*
* A note on @num_free: with indirect buffers, each buffer needs one
* element in the queue, otherwise a buffer will need one element per
......@@ -31,7 +33,9 @@ struct virtqueue {
struct virtio_device *vdev;
unsigned int index;
unsigned int num_free;
unsigned int num_max;
void *priv;
bool reset;
};
int virtqueue_add_outbuf(struct virtqueue *vq,
......@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
/**
* virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus
......@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev);
void virtio_break_device(struct virtio_device *dev);
void __virtio_unbreak_device(struct virtio_device *dev);
void __virtqueue_break(struct virtqueue *_vq);
void __virtqueue_unbreak(struct virtqueue *_vq);
void virtio_config_changed(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev);
......
......@@ -55,6 +55,7 @@ struct virtio_shm_region {
* include a NULL entry for vqs that do not need a callback
* names: array of virtqueue names (mainly for debugging)
* include a NULL entry for vqs unused by driver
* sizes: array of virtqueue sizes
* Returns 0 on success or error status
* @del_vqs: free virtqueues found by find_vqs().
* @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
......@@ -78,6 +79,18 @@ struct virtio_shm_region {
* @set_vq_affinity: set the affinity for a virtqueue (optional).
* @get_vq_affinity: get the affinity for a virtqueue (optional).
* @get_shm_region: get a shared memory region based on the index.
* @disable_vq_and_reset: reset a queue individually (optional).
* vq: the virtqueue
* Returns 0 on success or error status
* disable_vq_and_reset will guarantee that the callbacks are disabled and
* synchronized.
* Except for the callback, the caller should guarantee that the vring is
* not accessed by any functions of virtqueue.
* @enable_vq_after_reset: enable a reset queue
* vq: the virtqueue
* Returns 0 on success or error status
* If disable_vq_and_reset is set, then enable_vq_after_reset must also be
* set.
*/
typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops {
......@@ -91,7 +104,9 @@ struct virtio_config_ops {
void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx,
const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc);
void (*del_vqs)(struct virtio_device *);
void (*synchronize_cbs)(struct virtio_device *);
......@@ -104,6 +119,8 @@ struct virtio_config_ops {
int index);
bool (*get_shm_region)(struct virtio_device *vdev,
struct virtio_shm_region *region, u8 id);
int (*disable_vq_and_reset)(struct virtqueue *vq);
int (*enable_vq_after_reset)(struct virtqueue *vq);
};
/* If driver didn't advertise the feature, it will never appear. */
......@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
const char *names[] = { n };
struct virtqueue *vq;
int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
NULL);
NULL, NULL);
if (err < 0)
return ERR_PTR(err);
return vq;
......@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
const char * const names[],
struct irq_affinity *desc)
{
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
NULL, desc);
}
static inline
......@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const bool *ctx,
struct irq_affinity *desc)
{
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
desc);
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
ctx, desc);
}
static inline
int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx, struct irq_affinity *desc)
{
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
ctx, desc);
}
/**
......
......@@ -5,6 +5,13 @@
#include <linux/pci.h>
#include <linux/virtio_pci.h>
struct virtio_pci_modern_common_cfg {
struct virtio_pci_common_cfg cfg;
__le16 queue_notify_data; /* read-write */
__le16 queue_reset; /* read-write */
};
struct virtio_pci_modern_device {
struct pci_dev *pci_dev;
......@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
#endif
......@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name);
/* Creates a virtqueue with a custom layout. */
struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
bool ctx,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
/*
* Creates a virtqueue with a standard layout but a caller-allocated
* ring.
......
......@@ -210,6 +210,53 @@ struct vduse_vq_eventfd {
*/
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
/**
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: start of the IOVA region
* @size: size of the IOVA region
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
* ioctls to register/de-register userspace memory for IOVA regions
*/
struct vduse_iova_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
__u64 reserved[3];
};
/* Register userspace memory for IOVA regions */
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
/* De-register the userspace memory. Caller should set iova and size field. */
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
/**
* struct vduse_iova_info - information of one IOVA region
* @start: start of the IOVA region
* @last: last of the IOVA region
* @capability: capability of the IOVA regsion
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
* one IOVA region.
*/
struct vduse_iova_info {
__u64 start;
__u64 last;
#define VDUSE_IOVA_CAP_UMEM (1 << 0)
__u64 capability;
__u64 reserved[3];
};
/*
* Find the first IOVA region that overlaps with the range [start, last]
* and return some information on it. Caller should set start and last fields.
*/
#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
/**
......
......@@ -171,4 +171,13 @@
#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
struct vhost_vring_state)
/* Suspend a device so it does not process virtqueue requests anymore
*
* After the return of ioctl the device must preserve all the necessary state
* (the virtqueue vring base plus the possible device specific states) that is
* required for restoring in the future. The device must not change its
* configuration after that point.
*/
#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
#endif
......@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range {
* message
*/
#define VHOST_BACKEND_F_IOTLB_ASID 0x3
/* Device can be suspended */
#define VHOST_BACKEND_F_SUSPEND 0x4
#endif
......@@ -52,7 +52,7 @@
* rest are per-device feature bits.
*/
#define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 38
#define VIRTIO_TRANSPORT_F_END 41
#ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've
......@@ -98,4 +98,9 @@
* Does the device support Single Root I/O Virtualization?
*/
#define VIRTIO_F_SR_IOV 37
/*
* This feature indicates that the driver can reset a queue individually.
*/
#define VIRTIO_F_RING_RESET 40
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
......@@ -56,7 +56,7 @@
#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow
* Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
......@@ -355,4 +355,36 @@ struct virtio_net_hash_config {
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0
/*
* Control notifications coalescing.
*
* Request the device to change the notifications coalescing parameters.
*
* Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
*/
#define VIRTIO_NET_CTRL_NOTF_COAL 6
/*
* Set the tx-usecs/tx-max-packets patameters.
* tx-usecs - Maximum number of usecs to delay a TX notification.
* tx-max-packets - Maximum number of packets to send before a TX notification.
*/
struct virtio_net_ctrl_coal_tx {
__le32 tx_max_packets;
__le32 tx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0
/*
* Set the rx-usecs/rx-max-packets patameters.
* rx-usecs - Maximum number of usecs to delay a RX notification.
* rx-max-frames - Maximum number of packets to receive before a RX notification.
*/
struct virtio_net_ctrl_coal_rx {
__le32 rx_max_packets;
__le32 rx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1
#endif /* _UAPI_LINUX_VIRTIO_NET_H */
......@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap {
#define VIRTIO_PCI_COMMON_Q_AVAILHI 44
#define VIRTIO_PCI_COMMON_Q_USEDLO 48
#define VIRTIO_PCI_COMMON_Q_USEDHI 52
#define VIRTIO_PCI_COMMON_Q_NDATA 56
#define VIRTIO_PCI_COMMON_Q_RESET 58
#endif /* VIRTIO_PCI_NO_MODERN */
......
......@@ -29,7 +29,6 @@
#define READ 0
#define WRITE 1
typedef unsigned long long phys_addr_t;
typedef unsigned long long dma_addr_t;
typedef size_t __kernel_size_t;
typedef unsigned int __wsum;
......@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
#endif
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn_once(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
......
#include <limits.h>
#include "../../../include/linux/vringh.h"
......@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
memset(info->ring, 0, vring_size(num, 4096));
vring_init(&info->vring, num, info->ring, 4096);
info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true,
false, vq_notify, vq_callback, "test");
info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
info->ring, vq_notify, vq_callback, "test");
assert(info->vq);
info->vq->priv = info;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment