Commit 16bb86b5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
 "A bunch of new drivers including vdpa support for block and
  virtio-vdpa.

  Beginning of vq kick (aka doorbell) mapping support.

  Misc fixes"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits)
  virtio_pci_modern: correct sparse tags for notify
  virtio_pci_modern: __force cast the notify mapping
  vDPA/ifcvf: get_config_size should return dev specific config size
  vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA
  vDPA/ifcvf: deduce VIRTIO device ID when probe
  vdpa_sim_blk: add support for vdpa management tool
  vdpa_sim_blk: handle VIRTIO_BLK_T_GET_ID
  vdpa_sim_blk: implement ramdisk behaviour
  vdpa: add vdpa simulator for block device
  vhost/vdpa: Remove the restriction that only supports virtio-net devices
  vhost/vdpa: use get_config_size callback in vhost_vdpa_config_validate()
  vdpa: add get_config_size callback in vdpa_config_ops
  vdpa_sim: cleanup kiovs in vdpasim_free()
  vringh: add vringh_kiov_length() helper
  vringh: implement vringh_kiov_advance()
  vringh: explain more about cleaning riov and wiov
  vringh: reset kiov 'consumed' field in __vringh_iov()
  vringh: add 'iotlb_lock' to synchronize iotlb accesses
  vdpa_sim: use iova module to allocate IOVA addresses
  vDPA/ifcvf: deduce VIRTIO device ID from pdev ids
  ...
parents 57151b50 d7bce85a
......@@ -42,6 +42,7 @@ obj-$(CONFIG_DMADEVICES) += dma/
obj-y += soc/
obj-$(CONFIG_VIRTIO) += virtio/
obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/
obj-$(CONFIG_VDPA) += vdpa/
obj-$(CONFIG_XEN) += xen/
......
......@@ -2870,9 +2870,13 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
{
int i;
vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
if (!vi->ctrl)
goto err_ctrl;
if (vi->has_cvq) {
vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
if (!vi->ctrl)
goto err_ctrl;
} else {
vi->ctrl = NULL;
}
vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
if (!vi->sq)
goto err_sq;
......
......@@ -14,6 +14,7 @@ config VDPA_SIM
depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS
select VHOST_RING
select IOMMU_IOVA
help
Enable this module to support vDPA device simulators. These devices
are used for testing, prototyping and development of vDPA.
......@@ -25,6 +26,13 @@ config VDPA_SIM_NET
help
vDPA networking device simulator which loops TX traffic back to RX.
config VDPA_SIM_BLOCK
tristate "vDPA simulator for block device"
depends on VDPA_SIM
help
vDPA block device simulator which terminates IO request in a
memory buffer.
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
......@@ -52,4 +60,11 @@ config MLX5_VDPA_NET
be executed by the hardware. It also supports a variety of stateless
offloads depending on the actual device used and firmware version.
config VP_VDPA
tristate "Virtio PCI bridge vDPA driver"
select VIRTIO_PCI_LIB
depends on PCI_MSI
help
This kernel module bridges virtio PCI device to vDPA bus.
endif # VDPA
......@@ -3,3 +3,4 @@ obj-$(CONFIG_VDPA) += vdpa.o
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
obj-$(CONFIG_VP_VDPA) += virtio_pci/
......@@ -202,10 +202,11 @@ static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
ifcvf_get_status(hw);
}
u64 ifcvf_get_features(struct ifcvf_hw *hw)
u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
u32 features_lo, features_hi;
u64 features;
ifc_iowrite32(0, &cfg->device_feature_select);
features_lo = ifc_ioread32(&cfg->device_feature);
......@@ -213,7 +214,26 @@ u64 ifcvf_get_features(struct ifcvf_hw *hw)
ifc_iowrite32(1, &cfg->device_feature_select);
features_hi = ifc_ioread32(&cfg->device_feature);
return ((u64)features_hi << 32) | features_lo;
features = ((u64)features_hi << 32) | features_lo;
return features;
}
u64 ifcvf_get_features(struct ifcvf_hw *hw)
{
return hw->hw_features;
}
int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
{
struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
IFCVF_ERR(ifcvf->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
return -EINVAL;
}
return 0;
}
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
......
......@@ -15,15 +15,26 @@
#include <linux/pci_regs.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_blk.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h>
#define IFCVF_VENDOR_ID 0x1AF4
#define IFCVF_DEVICE_ID 0x1041
#define IFCVF_SUBSYS_VENDOR_ID 0x8086
#define IFCVF_SUBSYS_DEVICE_ID 0x001A
#define N3000_VENDOR_ID 0x1AF4
#define N3000_DEVICE_ID 0x1041
#define N3000_SUBSYS_VENDOR_ID 0x8086
#define N3000_SUBSYS_DEVICE_ID 0x001A
#define IFCVF_SUPPORTED_FEATURES \
#define C5000X_PL_VENDOR_ID 0x1AF4
#define C5000X_PL_DEVICE_ID 0x1000
#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
#define C5000X_PL_BLK_VENDOR_ID 0x1AF4
#define C5000X_PL_BLK_DEVICE_ID 0x1001
#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
#define IFCVF_NET_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
......@@ -78,6 +89,8 @@ struct ifcvf_hw {
void __iomem *notify_base;
u32 notify_off_multiplier;
u64 req_features;
u64 hw_features;
u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
......@@ -116,7 +129,10 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_features(struct ifcvf_hw *hw);
u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */
......@@ -14,7 +14,6 @@
#include <linux/sysfs.h>
#include "ifcvf_base.h"
#define VERSION_STRING "0.1"
#define DRIVER_AUTHOR "Intel Corporation"
#define IFCVF_DRIVER_NAME "ifcvf"
......@@ -169,10 +168,23 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
{
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
struct pci_dev *pdev = adapter->pdev;
u64 features;
features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
switch (vf->dev_type) {
case VIRTIO_ID_NET:
features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
break;
case VIRTIO_ID_BLOCK:
features = ifcvf_get_features(vf);
break;
default:
features = 0;
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
}
return features;
}
......@@ -180,6 +192,11 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
int ret;
ret = ifcvf_verify_min_features(vf, features);
if (ret)
return ret;
vf->req_features = features;
......@@ -319,12 +336,17 @@ static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev)
static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
{
return VIRTIO_ID_NET;
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
return vf->dev_type;
}
static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
{
return IFCVF_SUBSYS_VENDOR_ID;
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct pci_dev *pdev = adapter->pdev;
return pdev->subsystem_vendor;
}
static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
......@@ -332,6 +354,28 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
return IFCVF_QUEUE_ALIGNMENT;
}
static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
{
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
struct pci_dev *pdev = adapter->pdev;
size_t size;
switch (vf->dev_type) {
case VIRTIO_ID_NET:
size = sizeof(struct virtio_net_config);
break;
case VIRTIO_ID_BLOCK:
size = sizeof(struct virtio_blk_config);
break;
default:
size = 0;
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
}
return size;
}
static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
unsigned int offset,
void *buf, unsigned int len)
......@@ -392,6 +436,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
.get_device_id = ifcvf_vdpa_get_device_id,
.get_vendor_id = ifcvf_vdpa_get_vendor_id,
.get_vq_align = ifcvf_vdpa_get_vq_align,
.get_config_size = ifcvf_vdpa_get_config_size,
.get_config = ifcvf_vdpa_get_config,
.set_config = ifcvf_vdpa_set_config,
.set_config_cb = ifcvf_vdpa_set_config_cb,
......@@ -441,6 +486,19 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, adapter);
vf = &adapter->vf;
/* This drirver drives both modern virtio devices and transitional
* devices in modern mode.
* vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
* so legacy devices and transitional devices in legacy
* mode will not work for vDPA, this driver will not
* drive devices with legacy interface.
*/
if (pdev->device < 0x1040)
vf->dev_type = pdev->subsystem_device;
else
vf->dev_type = pdev->device - 0x1040;
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
......@@ -455,6 +513,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
if (ret) {
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
......@@ -476,10 +536,19 @@ static void ifcvf_remove(struct pci_dev *pdev)
}
static struct pci_device_id ifcvf_pci_ids[] = {
{ PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
IFCVF_DEVICE_ID,
IFCVF_SUBSYS_VENDOR_ID,
IFCVF_SUBSYS_DEVICE_ID) },
{ PCI_DEVICE_SUB(N3000_VENDOR_ID,
N3000_DEVICE_ID,
N3000_SUBSYS_VENDOR_ID,
N3000_SUBSYS_DEVICE_ID) },
{ PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
C5000X_PL_DEVICE_ID,
C5000X_PL_SUBSYS_VENDOR_ID,
C5000X_PL_SUBSYS_DEVICE_ID) },
{ PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
C5000X_PL_BLK_DEVICE_ID,
C5000X_PL_BLK_SUBSYS_VENDOR_ID,
C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
{ 0 },
};
MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
......@@ -494,4 +563,3 @@ static struct pci_driver ifcvf_driver = {
module_pci_driver(ifcvf_driver);
MODULE_LICENSE("GPL v2");
MODULE_VERSION(VERSION_STRING);
......@@ -1809,6 +1809,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
}
static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
{
return sizeof(struct virtio_net_config);
}
static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
unsigned int len)
{
......@@ -1895,6 +1900,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_vendor_id = mlx5_vdpa_get_vendor_id,
.get_status = mlx5_vdpa_get_status,
.set_status = mlx5_vdpa_set_status,
.get_config_size = mlx5_vdpa_get_config_size,
.get_config = mlx5_vdpa_get_config,
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
......@@ -1974,23 +1980,32 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
}
}
static int mlx5v_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
struct mlx5_vdpa_mgmtdev {
struct vdpa_mgmt_dev mgtdev;
struct mlx5_adev *madev;
struct mlx5_vdpa_net *ndev;
};
static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
{
struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
struct mlx5_core_dev *mdev = madev->mdev;
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
struct virtio_net_config *config;
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
struct mlx5_core_dev *mdev;
u32 max_vqs;
int err;
if (mgtdev->ndev)
return -ENOSPC;
mdev = mgtdev->madev->mdev;
/* we save one virtqueue for control virtqueue should we require it */
max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
NULL);
name);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
......@@ -2017,11 +2032,12 @@ static int mlx5v_probe(struct auxiliary_device *adev,
if (err)
goto err_res;
err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
if (err)
goto err_reg;
dev_set_drvdata(&adev->dev, ndev);
mgtdev->ndev = ndev;
return 0;
err_reg:
......@@ -2034,11 +2050,62 @@ static int mlx5v_probe(struct auxiliary_device *adev,
return err;
}
static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
{
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
static const struct vdpa_mgmtdev_ops mdev_ops = {
.dev_add = mlx5_vdpa_dev_add,
.dev_del = mlx5_vdpa_dev_del,
};
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static int mlx5v_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
struct mlx5_core_dev *mdev = madev->mdev;
struct mlx5_vdpa_mgmtdev *mgtdev;
int err;
mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
if (!mgtdev)
return -ENOMEM;
mgtdev->mgtdev.ops = &mdev_ops;
mgtdev->mgtdev.device = mdev->device;
mgtdev->mgtdev.id_table = id_table;
mgtdev->madev = madev;
err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
if (err)
goto reg_err;
dev_set_drvdata(&adev->dev, mgtdev);
return 0;
reg_err:
kfree(mgtdev);
return err;
}
static void mlx5v_remove(struct auxiliary_device *adev)
{
struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev);
struct mlx5_vdpa_mgmtdev *mgtdev;
vdpa_unregister_device(&mvdev->vdev);
mgtdev = dev_get_drvdata(&adev->dev);
vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
kfree(mgtdev);
}
static const struct auxiliary_device_id mlx5v_id_table[] = {
......
......@@ -75,8 +75,8 @@ static void vdpa_release_dev(struct device *d)
* Driver should use vdpa_alloc_device() wrapper macro instead of
* using this directly.
*
* Returns an error when parent/config/dma_dev is not set or fail to get
* ida.
* Return: Returns an error when parent/config/dma_dev is not set or fail to get
* ida.
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
......@@ -157,7 +157,7 @@ static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs)
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
* Returns an error when fail to add device to vDPA bus
* Return: Returns an error when fail to add device to vDPA bus
*/
int _vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
......@@ -174,7 +174,7 @@ EXPORT_SYMBOL_GPL(_vdpa_register_device);
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
* Returns an error when fail to add to vDPA bus
* Return: Returns an error when fail to add to vDPA bus
*/
int vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
......@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_device);
* @drv: the vdpa device driver to be registered
* @owner: module owner of the driver
*
* Returns an err when fail to do the registration
* Return: Returns an err when fail to do the registration
*/
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
{
......@@ -245,6 +245,8 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
* @mdev: Pointer to vdpa management device
* vdpa_mgmtdev_register() register a vdpa management device which supports
* vdpa device management.
* Return: Returns 0 on success or failure when required callback ops are not
* initialized.
*/
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
{
......
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
obj-$(CONFIG_VDPA_SIM_BLOCK) += vdpa_sim_blk.o
......@@ -17,6 +17,7 @@
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
#include <linux/iova.h>
#include "vdpa_sim.h"
......@@ -128,30 +129,57 @@ static int dir_to_perm(enum dma_data_direction dir)
return perm;
}
static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
size_t size, unsigned int perm)
{
struct iova *iova;
dma_addr_t dma_addr;
int ret;
/* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
if (!iova)
return DMA_MAPPING_ERROR;
dma_addr = iova_dma_addr(&vdpasim->iova, iova);
spin_lock(&vdpasim->iommu_lock);
ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr,
(u64)dma_addr + size - 1, (u64)paddr, perm);
spin_unlock(&vdpasim->iommu_lock);
if (ret) {
__free_iova(&vdpasim->iova, iova);
return DMA_MAPPING_ERROR;
}
return dma_addr;
}
static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
size_t size)
{
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr,
(u64)dma_addr + size - 1);
spin_unlock(&vdpasim->iommu_lock);
free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr));
}
static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
int ret, perm = dir_to_perm(dir);
phys_addr_t paddr = page_to_phys(page) + offset;
int perm = dir_to_perm(dir);
if (perm < 0)
return DMA_MAPPING_ERROR;
/* For simplicity, use identical mapping to avoid e.g iova
* allocator.
*/
spin_lock(&vdpasim->iommu_lock);
ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
pa, dir_to_perm(dir));
spin_unlock(&vdpasim->iommu_lock);
if (ret)
return DMA_MAPPING_ERROR;
return (dma_addr_t)(pa);
return vdpasim_map_range(vdpasim, paddr, size, perm);
}
static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
......@@ -159,12 +187,8 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(iommu, (u64)dma_addr,
(u64)dma_addr + size - 1);
spin_unlock(&vdpasim->iommu_lock);
vdpasim_unmap_range(vdpasim, dma_addr, size);
}
static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
......@@ -172,27 +196,22 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
void *addr = kmalloc(size, flag);
int ret;
phys_addr_t paddr;
void *addr;
spin_lock(&vdpasim->iommu_lock);
addr = kmalloc(size, flag);
if (!addr) {
*dma_addr = DMA_MAPPING_ERROR;
} else {
u64 pa = virt_to_phys(addr);
ret = vhost_iotlb_add_range(iommu, (u64)pa,
(u64)pa + size - 1,
pa, VHOST_MAP_RW);
if (ret) {
*dma_addr = DMA_MAPPING_ERROR;
kfree(addr);
addr = NULL;
} else
*dma_addr = (dma_addr_t)pa;
return NULL;
}
paddr = virt_to_phys(addr);
*dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW);
if (*dma_addr == DMA_MAPPING_ERROR) {
kfree(addr);
return NULL;
}
spin_unlock(&vdpasim->iommu_lock);
return addr;
}
......@@ -202,14 +221,10 @@ static void vdpasim_free_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
struct vhost_iotlb *iommu = vdpasim->iommu;
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(iommu, (u64)dma_addr,
(u64)dma_addr + size - 1);
spin_unlock(&vdpasim->iommu_lock);
vdpasim_unmap_range(vdpasim, dma_addr, size);
kfree(phys_to_virt((uintptr_t)dma_addr));
kfree(vaddr);
}
static const struct dma_map_ops vdpasim_dma_ops = {
......@@ -269,7 +284,15 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu;
for (i = 0; i < dev_attr->nvqs; i++)
vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu,
&vdpasim->iommu_lock);
ret = iova_cache_get();
if (ret)
goto err_iommu;
/* For simplicity we use an IOVA allocator with byte granularity */
init_iova_domain(&vdpasim->iova, 1, 0);
vdpasim->vdpa.dma_dev = dev;
......@@ -439,6 +462,13 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
spin_unlock(&vdpasim->lock);
}
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
return vdpasim->dev_attr.config_size;
}
static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
void *buf, unsigned int len)
{
......@@ -539,8 +569,17 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size)
static void vdpasim_free(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int i;
cancel_work_sync(&vdpasim->work);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
}
put_iova_domain(&vdpasim->iova);
iova_cache_put();
kvfree(vdpasim->buffer);
if (vdpasim->iommu)
vhost_iotlb_free(vdpasim->iommu);
......@@ -566,6 +605,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
......@@ -593,6 +633,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
......
......@@ -6,6 +6,7 @@
#ifndef _VDPA_SIM_H
#define _VDPA_SIM_H
#include <linux/iova.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
......@@ -57,6 +58,7 @@ struct vdpasim {
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
struct iova_domain iova;
void *buffer;
u32 status;
u32 generation;
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* VDPA simulator for block device.
*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2021, Red Hat Inc. All rights reserved.
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/blkdev.h>
#include <uapi/linux/virtio_blk.h>
#include "vdpa_sim.h"
#define DRV_VERSION "0.1"
#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
#define DRV_DESC "vDPA Device Simulator for block device"
#define DRV_LICENSE "GPL v2"
#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
(1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
(1ULL << VIRTIO_BLK_F_SEG_MAX) | \
(1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
(1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
(1ULL << VIRTIO_BLK_F_MQ))
#define VDPASIM_BLK_CAPACITY 0x40000
#define VDPASIM_BLK_SIZE_MAX 0x1000
#define VDPASIM_BLK_SEG_MAX 32
#define VDPASIM_BLK_VQ_NUM 1
static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
{
u64 range_sectors = range_size >> SECTOR_SHIFT;
if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
return false;
if (start_sector > VDPASIM_BLK_CAPACITY)
return false;
if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
return false;
return true;
}
/* Returns 'true' if the request is handled (with or without an I/O error)
* and the status is correctly written in the last byte of the 'in iov',
* 'false' otherwise.
*/
static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
struct vdpasim_virtqueue *vq)
{
size_t pushed = 0, to_pull, to_push;
struct virtio_blk_outhdr hdr;
ssize_t bytes;
loff_t offset;
u64 sector;
u8 status;
u32 type;
int ret;
ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
&vq->head, GFP_ATOMIC);
if (ret != 1)
return false;
if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
vq->out_iov.used, vq->in_iov.used);
return false;
}
if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
dev_err(&vdpasim->vdpa.dev, "request in header too short\n");
return false;
}
/* The last byte is the status and we checked if the last iov has
* enough room for it.
*/
to_push = vringh_kiov_length(&vq->in_iov) - 1;
to_pull = vringh_kiov_length(&vq->out_iov);
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
sizeof(hdr));
if (bytes != sizeof(hdr)) {
dev_err(&vdpasim->vdpa.dev, "request out header too short\n");
return false;
}
to_pull -= bytes;
type = vdpasim32_to_cpu(vdpasim, hdr.type);
sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
offset = sector << SECTOR_SHIFT;
status = VIRTIO_BLK_S_OK;
switch (type) {
case VIRTIO_BLK_T_IN:
if (!vdpasim_blk_check_range(sector, to_push)) {
dev_err(&vdpasim->vdpa.dev,
"reading over the capacity - offset: 0x%llx len: 0x%zx\n",
offset, to_push);
status = VIRTIO_BLK_S_IOERR;
break;
}
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
vdpasim->buffer + offset,
to_push);
if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_push);
status = VIRTIO_BLK_S_IOERR;
break;
}
pushed += bytes;
break;
case VIRTIO_BLK_T_OUT:
if (!vdpasim_blk_check_range(sector, to_pull)) {
dev_err(&vdpasim->vdpa.dev,
"writing over the capacity - offset: 0x%llx len: 0x%zx\n",
offset, to_pull);
status = VIRTIO_BLK_S_IOERR;
break;
}
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
vdpasim->buffer + offset,
to_pull);
if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_pull);
status = VIRTIO_BLK_S_IOERR;
break;
}
break;
case VIRTIO_BLK_T_GET_ID:
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
vdpasim_blk_id,
VIRTIO_BLK_ID_BYTES);
if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd\n", bytes);
status = VIRTIO_BLK_S_IOERR;
break;
}
pushed += bytes;
break;
default:
dev_warn(&vdpasim->vdpa.dev,
"Unsupported request type %d\n", type);
status = VIRTIO_BLK_S_IOERR;
break;
}
/* If some operations fail, we need to skip the remaining bytes
* to put the status in the last byte
*/
if (to_push - pushed > 0)
vringh_kiov_advance(&vq->in_iov, to_push - pushed);
/* Last byte is the status */
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
if (bytes != 1)
return false;
pushed += bytes;
/* Make sure data is wrote before advancing index */
smp_wmb();
vringh_complete_iotlb(&vq->vring, vq->head, pushed);
return true;
}
static void vdpasim_blk_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
int i;
spin_lock(&vdpasim->lock);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
if (!vq->ready)
continue;
while (vdpasim_blk_handle_req(vdpasim, vq)) {
/* Make sure used is visible before rasing the interrupt. */
smp_wmb();
local_bh_disable();
if (vringh_need_notify_iotlb(&vq->vring) > 0)
vringh_notify(&vq->vring);
local_bh_enable();
}
}
out:
spin_unlock(&vdpasim->lock);
}
static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
{
struct virtio_blk_config *blk_config = config;
memset(config, 0, sizeof(struct virtio_blk_config));
blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
}
static void vdpasim_blk_mgmtdev_release(struct device *dev)
{
}
static struct device vdpasim_blk_mgmtdev = {
.init_name = "vdpasim_blk",
.release = vdpasim_blk_mgmtdev_release,
};
static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
{
struct vdpasim_dev_attr dev_attr = {};
struct vdpasim *simdev;
int ret;
dev_attr.mgmt_dev = mdev;
dev_attr.name = name;
dev_attr.id = VIRTIO_ID_BLOCK;
dev_attr.supported_features = VDPASIM_BLK_FEATURES;
dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
dev_attr.config_size = sizeof(struct virtio_blk_config);
dev_attr.get_config = vdpasim_blk_get_config;
dev_attr.work_fn = vdpasim_blk_work;
dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
simdev = vdpasim_create(&dev_attr);
if (IS_ERR(simdev))
return PTR_ERR(simdev);
ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
if (ret)
goto put_dev;
return 0;
put_dev:
put_device(&simdev->vdpa.dev);
return ret;
}
static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
struct vdpa_device *dev)
{
struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
_vdpa_unregister_device(&simdev->vdpa);
}
static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
.dev_add = vdpasim_blk_dev_add,
.dev_del = vdpasim_blk_dev_del
};
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static struct vdpa_mgmt_dev mgmt_dev = {
.device = &vdpasim_blk_mgmtdev,
.id_table = id_table,
.ops = &vdpasim_blk_mgmtdev_ops,
};
static int __init vdpasim_blk_init(void)
{
int ret;
ret = device_register(&vdpasim_blk_mgmtdev);
if (ret)
return ret;
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
goto parent_err;
return 0;
parent_err:
device_unregister(&vdpasim_blk_mgmtdev);
return ret;
}
static void __exit vdpasim_blk_exit(void)
{
vdpa_mgmtdev_unregister(&mgmt_dev);
device_unregister(&vdpasim_blk_mgmtdev);
}
module_init(vdpasim_blk_init)
module_exit(vdpasim_blk_exit)
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE(DRV_LICENSE);
MODULE_AUTHOR(DRV_AUTHOR);
MODULE_DESCRIPTION(DRV_DESC);
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
This diff is collapsed.
......@@ -16,12 +16,12 @@
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include "vhost.h"
......@@ -188,13 +188,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
struct vhost_vdpa_config *c)
{
long size = 0;
switch (v->virtio_id) {
case VIRTIO_ID_NET:
size = sizeof(struct virtio_net_config);
break;
}
struct vdpa_device *vdpa = v->vdpa;
long size = vdpa->config->get_config_size(vdpa);
if (c->len == 0)
return -EINVAL;
......@@ -989,6 +984,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_end - vma->vm_start != notify.size)
return -ENOTSUPP;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &vhost_vdpa_vm_ops;
return 0;
}
......@@ -1023,10 +1019,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
int minor;
int r;
/* Currently, we only accept the network devices. */
if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
return -ENOTSUPP;
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
......
......@@ -75,6 +75,34 @@ static inline int __vringh_get_head(const struct vringh *vrh,
return head;
}
/**
* vringh_kiov_advance - skip bytes from vring_kiov
* @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
* @len: the maximum length to advance
*/
void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
{
while (len && iov->i < iov->used) {
size_t partlen = min(iov->iov[iov->i].iov_len, len);
iov->consumed += partlen;
iov->iov[iov->i].iov_len -= partlen;
iov->iov[iov->i].iov_base += partlen;
if (!iov->iov[iov->i].iov_len) {
/* Fix up old iov element then increment. */
iov->iov[iov->i].iov_len = iov->consumed;
iov->iov[iov->i].iov_base -= iov->consumed;
iov->consumed = 0;
iov->i++;
}
len -= partlen;
}
}
EXPORT_SYMBOL(vringh_kiov_advance);
/* Copy some bytes to/from the iovec. Returns num copied. */
static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
struct vringh_kiov *iov,
......@@ -95,19 +123,8 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
done += partlen;
len -= partlen;
ptr += partlen;
iov->consumed += partlen;
iov->iov[iov->i].iov_len -= partlen;
iov->iov[iov->i].iov_base += partlen;
if (!iov->iov[iov->i].iov_len) {
/* Fix up old iov element then increment. */
iov->iov[iov->i].iov_len = iov->consumed;
iov->iov[iov->i].iov_base -= iov->consumed;
iov->consumed = 0;
iov->i++;
}
vringh_kiov_advance(iov, partlen);
}
return done;
}
......@@ -290,9 +307,9 @@ __vringh_iov(struct vringh *vrh, u16 i,
return -EINVAL;
if (riov)
riov->i = riov->used = 0;
riov->i = riov->used = riov->consumed = 0;
if (wiov)
wiov->i = wiov->used = 0;
wiov->i = wiov->used = wiov->consumed = 0;
for (;;) {
void *addr;
......@@ -662,7 +679,10 @@ EXPORT_SYMBOL(vringh_init_user);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
* Note that you may need to clean up riov and wiov, even on error!
* Note that you can reuse riov and wiov with subsequent calls. Content is
* overwritten and memory reallocated if more space is needed.
* When you don't have to use riov and wiov anymore, you should clean up them
* calling vringh_iov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_user(struct vringh *vrh,
struct vringh_iov *riov,
......@@ -932,7 +952,10 @@ EXPORT_SYMBOL(vringh_init_kern);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
* Note that you may need to clean up riov and wiov, even on error!
* Note that you can reuse riov and wiov with subsequent calls. Content is
* overwritten and memory reallocated if more space is needed.
* When you don't have to use riov and wiov anymore, you should clean up them
* calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
......@@ -1074,6 +1097,8 @@ static int iotlb_translate(const struct vringh *vrh,
int ret = 0;
u64 s = 0;
spin_lock(vrh->iotlb_lock);
while (len > s) {
u64 size, pa, pfn;
......@@ -1103,6 +1128,8 @@ static int iotlb_translate(const struct vringh *vrh,
++ret;
}
spin_unlock(vrh->iotlb_lock);
return ret;
}
......@@ -1262,10 +1289,13 @@ EXPORT_SYMBOL(vringh_init_iotlb);
* vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
* @vrh: the vring
* @iotlb: iotlb associated with this vring
* @iotlb_lock: spinlock to synchronize the iotlb accesses
*/
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb)
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
spinlock_t *iotlb_lock)
{
vrh->iotlb = iotlb;
vrh->iotlb_lock = iotlb_lock;
}
EXPORT_SYMBOL(vringh_set_iotlb);
......@@ -1285,7 +1315,10 @@ EXPORT_SYMBOL(vringh_set_iotlb);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
* Note that you may need to clean up riov and wiov, even on error!
* Note that you can reuse riov and wiov with subsequent calls. Content is
* overwritten and memory reallocated if more space is needed.
* When you don't have to use riov and wiov anymore, you should clean up them
* calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_iotlb(struct vringh *vrh,
struct vringh_kiov *riov,
......
......@@ -734,7 +734,7 @@ static void report_free_page_func(struct work_struct *work)
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
* a compation thread. (called under page lock)
* a compaction thread. (called under page lock)
* @vb_dev_info: the balloon device
* @newpage: page that will replace the isolated page after migration finishes.
* @page : the isolated (old) page that is about to be migrated to newpage.
......
......@@ -192,7 +192,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtqueue *vq;
u16 num, off;
u16 num;
int err;
if (index >= vp_modern_get_num_queues(mdev))
......@@ -208,9 +208,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return ERR_PTR(-EINVAL);
}
/* get offset of notification word for this vq */
off = vp_modern_get_queue_notify_off(mdev, index);
info->msix_vector = msix_vec;
/* create the vring */
......@@ -227,27 +224,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
if (mdev->notify_base) {
/* offset should not wrap */
if ((u64)off * mdev->notify_offset_multiplier + 2
> mdev->notify_len) {
dev_warn(&mdev->pci_dev->dev,
"bad notification offset %u (x %u) "
"for queue %u > %zd",
off, mdev->notify_offset_multiplier,
index, mdev->notify_len);
err = -EINVAL;
goto err_map_notify;
}
vq->priv = (void __force *)mdev->notify_base +
off * mdev->notify_offset_multiplier;
} else {
vq->priv = (void __force *)vp_modern_map_capability(mdev,
mdev->notify_map_cap, 2, 2,
off * mdev->notify_offset_multiplier, 2,
NULL);
}
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
......
......@@ -13,14 +13,14 @@
* @start: start from the capability
* @size: map size
* @len: the length that is actually mapped
* @pa: physical address of the capability
*
* Returns the io address of for the part of the capability
*/
void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len)
static void __iomem *
vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
size_t minlen, u32 align, u32 start, u32 size,
size_t *len, resource_size_t *pa)
{
struct pci_dev *dev = mdev->pci_dev;
u8 bar;
......@@ -88,9 +88,11 @@ void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, in
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
else if (pa)
*pa = pci_resource_start(dev, bar) + offset;
return p;
}
EXPORT_SYMBOL_GPL(vp_modern_map_capability);
/**
* virtio_pci_find_capability - walk capabilities to find device info.
......@@ -275,12 +277,12 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL);
NULL, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
0, 1,
NULL);
NULL, NULL);
if (!mdev->isr)
goto err_map_isr;
......@@ -308,7 +310,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
mdev->notify_base = vp_modern_map_capability(mdev, notify,
2, 2,
0, notify_length,
&mdev->notify_len);
&mdev->notify_len,
&mdev->notify_pa);
if (!mdev->notify_base)
goto err_map_notify;
} else {
......@@ -321,7 +324,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
if (device) {
mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
0, PAGE_SIZE,
&mdev->device_len);
&mdev->device_len,
NULL);
if (!mdev->device)
goto err_map_device;
}
......@@ -584,14 +588,51 @@ EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
*
* Returns the notification offset for a virtqueue
*/
u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
u16 index)
static u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_notify_off);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off);
/*
* vp_modern_map_vq_notify - map notification area for a
* specific virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
* @pa: the pointer to the physical address of the nofity area
*
* Returns the address of the notification area
*/
void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
u16 index, resource_size_t *pa)
{
u16 off = vp_modern_get_queue_notify_off(mdev, index);
if (mdev->notify_base) {
/* offset should not wrap */
if ((u64)off * mdev->notify_offset_multiplier + 2
> mdev->notify_len) {
dev_warn(&mdev->pci_dev->dev,
"bad notification offset %u (x %u) "
"for queue %u > %zd",
off, mdev->notify_offset_multiplier,
index, mdev->notify_len);
return NULL;
}
if (pa)
*pa = mdev->notify_pa +
off * mdev->notify_offset_multiplier;
return mdev->notify_base + off * mdev->notify_offset_multiplier;
} else {
return vp_modern_map_capability(mdev,
mdev->notify_map_cap, 2, 2,
off * mdev->notify_offset_multiplier, 2,
NULL, pa);
}
}
EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Modern Virtio PCI Device");
......
......@@ -8,7 +8,7 @@
#include <linux/vhost_iotlb.h>
/**
* vDPA callback definition.
* struct vdpa_calllback - vDPA callback definition.
* @callback: interrupt callback function
* @private: the data passed to the callback function
*/
......@@ -18,7 +18,7 @@ struct vdpa_callback {
};
/**
* vDPA notification area
* struct vdpa_notification_area - vDPA notification area
* @addr: base address of the notification area
* @size: size of the notification area
*/
......@@ -28,7 +28,7 @@ struct vdpa_notification_area {
};
/**
* vDPA vq_state definition
* struct vdpa_vq_state - vDPA vq_state definition
* @avail_index: available index
*/
struct vdpa_vq_state {
......@@ -38,7 +38,7 @@ struct vdpa_vq_state {
struct vdpa_mgmt_dev;
/**
* vDPA device - representation of a vDPA device
* struct vdpa_device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
......@@ -59,7 +59,7 @@ struct vdpa_device {
};
/**
* vDPA IOVA range - the IOVA range support by the device
* struct vdpa_iova_range - the IOVA range support by the device
* @first: start of the IOVA range
* @last: end of the IOVA range
*/
......@@ -69,7 +69,7 @@ struct vdpa_iova_range {
};
/**
* vDPA_config_ops - operations for configuring a vDPA device.
* struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
* list.
......@@ -150,6 +150,9 @@ struct vdpa_iova_range {
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
* @get_config_size: Get the size of the configuration space
* @vdev: vdpa device
* Returns size_t: configuration size
* @get_config: Read from device specific configuration space
* @vdev: vdpa device
* @offset: offset from the beginning of
......@@ -231,6 +234,7 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
......@@ -267,7 +271,7 @@ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs);
void _vdpa_unregister_device(struct vdpa_device *vdev);
/**
* vdpa_driver - operations for a vDPA driver
* struct vdpa_driver - operations for a vDPA driver
* @driver: underlying device driver
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @remove: the function to call when a device is removed.
......@@ -344,18 +348,18 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
}
/**
* vdpa_mgmtdev_ops - vdpa device ops
* @dev_add: Add a vdpa device using alloc and register
* @mdev: parent device to use for device addition
* @name: name of the new vdpa device
* Driver need to add a new device using _vdpa_register_device()
* after fully initializing the vdpa device. Driver must return 0
* on success or appropriate error code.
* @dev_del: Remove a vdpa device using unregister
* @mdev: parent device to use for device removal
* @dev: vdpa device to remove
* Driver need to remove the specified device by calling
* _vdpa_unregister_device().
* struct vdpa_mgmtdev_ops - vdpa device ops
* @dev_add: Add a vdpa device using alloc and register
* @mdev: parent device to use for device addition
* @name: name of the new vdpa device
* Driver need to add a new device using _vdpa_register_device()
* after fully initializing the vdpa device. Driver must return 0
* on success or appropriate error code.
* @dev_del: Remove a vdpa device using unregister
* @mdev: parent device to use for device removal
* @dev: vdpa device to remove
* Driver need to remove the specified device by calling
* _vdpa_unregister_device().
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
......
......@@ -13,6 +13,8 @@ struct virtio_pci_modern_device {
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
/* Physical base of vq notifications */
resource_size_t notify_pa;
/* Where to read and clear interrupt */
u8 __iomem *isr;
......@@ -99,13 +101,8 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 idx);
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev);
u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
u16 idx);
void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len);
void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
#endif
......@@ -46,6 +46,9 @@ struct vringh {
/* IOTLB for this vring */
struct vhost_iotlb *iotlb;
/* spinlock to synchronize IOTLB accesses */
spinlock_t *iotlb_lock;
/* The function to call to notify the guest about added buffers */
void (*notify)(struct vringh *);
};
......@@ -196,6 +199,19 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov)
kiov->iov = NULL;
}
static inline size_t vringh_kiov_length(struct vringh_kiov *kiov)
{
size_t len = 0;
int i;
for (i = kiov->i; i < kiov->used; i++)
len += kiov->iov[i].iov_len;
return len;
}
void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len);
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
struct vringh_kiov *wiov,
......@@ -258,7 +274,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
spinlock_t *iotlb_lock);
int vringh_init_iotlb(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment