Commit 9bb71526 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - Some bug fixes

 - The new vdpa subsystem with two first drivers

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio-balloon: Revert "virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM"
  vdpa: move to drivers/vdpa
  virtio: Intel IFC VF driver for VDPA
  vdpasim: vDPA device simulator
  vhost: introduce vDPA-based backend
  virtio: introduce a vDPA based transport
  vDPA: introduce vDPA bus
  vringh: IOTLB support
  vhost: factor out IOTLB
  vhost: allow per device message handler
  vhost: refine vhost and vringh kconfig
  virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM
  virtio-net: Introduce hash report feature
  virtio-net: Introduce RSS receive steering feature
  virtio-net: Introduce extended RSC feature
  tools/virtio: option to build an out of tree module
parents ae46d2aa 835a6a64
......@@ -17870,10 +17870,12 @@ L: virtualization@lists.linux-foundation.org
S: Maintained
F: Documentation/devicetree/bindings/virtio/
F: drivers/virtio/
F: drivers/vdpa/
F: tools/virtio/
F: drivers/net/virtio_net.c
F: drivers/block/virtio_blk.c
F: include/linux/virtio*.h
F: include/linux/vdpa.h
F: include/uapi/linux/virtio_*.h
F: drivers/crypto/virtio/
F: mm/balloon_compaction.c
......@@ -17941,6 +17943,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
S: Maintained
F: drivers/vhost/
F: include/uapi/linux/vhost.h
F: include/linux/vhost_iotlb.h
VIRTIO INPUT DRIVER
M: Gerd Hoffmann <kraxel@redhat.com>
......
......@@ -64,6 +64,4 @@ config KVM_ARM_PMU
config KVM_INDIRECT_VECTORS
def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION
......@@ -72,6 +72,4 @@ config KVM_MIPS_DEBUG_COP0_COUNTERS
If unsure, say N.
source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION
......@@ -204,6 +204,4 @@ config KVM_XIVE
default y
depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION
......@@ -55,8 +55,4 @@ config KVM_S390_UCONTROL
If unsure, say N.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION
......@@ -107,8 +107,4 @@ config KVM_MMU_AUDIT
This option adds a R/W kVM module parameter 'mmu_audit', which allows
auditing of KVM MMU events at runtime.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION
......@@ -138,6 +138,10 @@ source "drivers/virt/Kconfig"
source "drivers/virtio/Kconfig"
source "drivers/vdpa/Kconfig"
source "drivers/vhost/Kconfig"
source "drivers/hv/Kconfig"
source "drivers/xen/Kconfig"
......
......@@ -42,6 +42,7 @@ obj-$(CONFIG_DMADEVICES) += dma/
obj-y += soc/
obj-$(CONFIG_VIRTIO) += virtio/
obj-$(CONFIG_VDPA) += vdpa/
obj-$(CONFIG_XEN) += xen/
# regulators early, since some subsystems rely on them to initialize
......
......@@ -133,8 +133,4 @@ config VOP
OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>.
if VOP
source "drivers/vhost/Kconfig.vringh"
endif
endmenu
......@@ -58,8 +58,4 @@ config CAIF_VIRTIO
---help---
The CAIF driver for CAIF over Virtio.
if CAIF_VIRTIO
source "drivers/vhost/Kconfig.vringh"
endif
endif # CAIF_DRIVERS
# SPDX-License-Identifier: GPL-2.0-only
config VDPA
tristate
help
Enable this module to support vDPA device that uses a
datapath which complies with virtio specifications with
vendor specific control path.
menuconfig VDPA_MENU
bool "VDPA drivers"
default n
if VDPA_MENU
config VDPA_SIM
tristate "vDPA device simulator"
depends on RUNTIME_TESTING_MENU
select VDPA
select VHOST_RING
default n
help
vDPA networking device simulator which loop TX traffic back
to RX. This device is used for testing, prototyping and
development of vDPA.
config IFCVF
tristate "Intel IFC VF VDPA driver"
depends on PCI_MSI
select VDPA
default n
help
This kernel module can drive Intel IFC VF NIC to offload
virtio dataplane traffic to hardware.
To compile this driver as a module, choose M here: the module will
be called ifcvf.
endif # VDPA_MENU
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA) += vdpa.o
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
obj-$(CONFIG_IFCVF) += ifcvf/
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IFCVF) += ifcvf.o
ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o
// SPDX-License-Identifier: GPL-2.0-only
/*
* Intel IFC VF NIC driver for virtio dataplane offloading
*
* Copyright (C) 2020 Intel Corporation.
*
* Author: Zhu Lingshan <lingshan.zhu@intel.com>
*
*/
#include "ifcvf_base.h"
static inline u8 ifc_ioread8(u8 __iomem *addr)
{
return ioread8(addr);
}
static inline u16 ifc_ioread16 (__le16 __iomem *addr)
{
return ioread16(addr);
}
static inline u32 ifc_ioread32(__le32 __iomem *addr)
{
return ioread32(addr);
}
static inline void ifc_iowrite8(u8 value, u8 __iomem *addr)
{
iowrite8(value, addr);
}
static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr)
{
iowrite16(value, addr);
}
static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr)
{
iowrite32(value, addr);
}
static void ifc_iowrite64_twopart(u64 val,
__le32 __iomem *lo, __le32 __iomem *hi)
{
ifc_iowrite32((u32)val, lo);
ifc_iowrite32(val >> 32, hi);
}
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
{
return container_of(hw, struct ifcvf_adapter, vf);
}
static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
struct virtio_pci_cap *cap)
{
struct ifcvf_adapter *ifcvf;
struct pci_dev *pdev;
u32 length, offset;
u8 bar;
length = le32_to_cpu(cap->length);
offset = le32_to_cpu(cap->offset);
bar = cap->bar;
ifcvf= vf_to_adapter(hw);
pdev = ifcvf->pdev;
if (bar >= IFCVF_PCI_MAX_RESOURCE) {
IFCVF_DBG(pdev,
"Invalid bar number %u to get capabilities\n", bar);
return NULL;
}
if (offset + length > pci_resource_len(pdev, bar)) {
IFCVF_DBG(pdev,
"offset(%u) + len(%u) overflows bar%u's capability\n",
offset, length, bar);
return NULL;
}
return hw->base[bar] + offset;
}
static int ifcvf_read_config_range(struct pci_dev *dev,
uint32_t *val, int size, int where)
{
int ret, i;
for (i = 0; i < size; i += 4) {
ret = pci_read_config_dword(dev, where + i, val + i / 4);
if (ret < 0)
return ret;
}
return 0;
}
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
{
struct virtio_pci_cap cap;
u16 notify_off;
int ret;
u8 pos;
u32 i;
ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos);
if (ret < 0) {
IFCVF_ERR(pdev, "Failed to read PCI capability list\n");
return -EIO;
}
while (pos) {
ret = ifcvf_read_config_range(pdev, (u32 *)&cap,
sizeof(cap), pos);
if (ret < 0) {
IFCVF_ERR(pdev,
"Failed to get PCI capability at %x\n", pos);
break;
}
if (cap.cap_vndr != PCI_CAP_ID_VNDR)
goto next;
switch (cap.cfg_type) {
case VIRTIO_PCI_CAP_COMMON_CFG:
hw->common_cfg = get_cap_addr(hw, &cap);
IFCVF_DBG(pdev, "hw->common_cfg = %p\n",
hw->common_cfg);
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
pci_read_config_dword(pdev, pos + sizeof(cap),
&hw->notify_off_multiplier);
hw->notify_bar = cap.bar;
hw->notify_base = get_cap_addr(hw, &cap);
IFCVF_DBG(pdev, "hw->notify_base = %p\n",
hw->notify_base);
break;
case VIRTIO_PCI_CAP_ISR_CFG:
hw->isr = get_cap_addr(hw, &cap);
IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr);
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->net_cfg = get_cap_addr(hw, &cap);
IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg);
break;
}
next:
pos = cap.cap_next;
}
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->net_cfg == NULL) {
IFCVF_ERR(pdev, "Incomplete PCI capabilities\n");
return -EIO;
}
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
ifc_iowrite16(i, &hw->common_cfg->queue_select);
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
hw->vring[i].notify_addr = hw->notify_base +
notify_off * hw->notify_off_multiplier;
}
hw->lm_cfg = hw->base[IFCVF_LM_BAR];
IFCVF_DBG(pdev,
"PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n",
hw->common_cfg, hw->notify_base, hw->isr,
hw->net_cfg, hw->notify_off_multiplier);
return 0;
}
u8 ifcvf_get_status(struct ifcvf_hw *hw)
{
return ifc_ioread8(&hw->common_cfg->device_status);
}
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
{
ifc_iowrite8(status, &hw->common_cfg->device_status);
}
void ifcvf_reset(struct ifcvf_hw *hw)
{
ifcvf_set_status(hw, 0);
/* flush set_status, make sure VF is stopped, reset */
ifcvf_get_status(hw);
}
static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
{
if (status != 0)
status |= ifcvf_get_status(hw);
ifcvf_set_status(hw, status);
ifcvf_get_status(hw);
}
u64 ifcvf_get_features(struct ifcvf_hw *hw)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
u32 features_lo, features_hi;
ifc_iowrite32(0, &cfg->device_feature_select);
features_lo = ifc_ioread32(&cfg->device_feature);
ifc_iowrite32(1, &cfg->device_feature_select);
features_hi = ifc_ioread32(&cfg->device_feature);
return ((u64)features_hi << 32) | features_lo;
}
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
void *dst, int length)
{
u8 old_gen, new_gen, *p;
int i;
WARN_ON(offset + length > sizeof(struct virtio_net_config));
do {
old_gen = ifc_ioread8(&hw->common_cfg->config_generation);
p = dst;
for (i = 0; i < length; i++)
*p++ = ifc_ioread8(hw->net_cfg + offset + i);
new_gen = ifc_ioread8(&hw->common_cfg->config_generation);
} while (old_gen != new_gen);
}
void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
const void *src, int length)
{
const u8 *p;
int i;
p = src;
WARN_ON(offset + length > sizeof(struct virtio_net_config));
for (i = 0; i < length; i++)
ifc_iowrite8(*p++, hw->net_cfg + offset + i);
}
static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
ifc_iowrite32(0, &cfg->guest_feature_select);
ifc_iowrite32((u32)features, &cfg->guest_feature);
ifc_iowrite32(1, &cfg->guest_feature_select);
ifc_iowrite32(features >> 32, &cfg->guest_feature);
}
static int ifcvf_config_features(struct ifcvf_hw *hw)
{
struct ifcvf_adapter *ifcvf;
ifcvf = vf_to_adapter(hw);
ifcvf_set_features(hw, hw->req_features);
ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n");
return -EIO;
}
return 0;
}
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
{
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
void __iomem *avail_idx_addr;
u16 last_avail_idx;
u32 q_pair_id;
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
last_avail_idx = ifc_ioread16(avail_idx_addr);
return last_avail_idx;
}
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num)
{
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
void __iomem *avail_idx_addr;
u32 q_pair_id;
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
hw->vring[qid].last_avail_idx = num;
ifc_iowrite16(num, avail_idx_addr);
return 0;
}
static int ifcvf_hw_enable(struct ifcvf_hw *hw)
{
struct ifcvf_lm_cfg __iomem *ifcvf_lm;
struct virtio_pci_common_cfg __iomem *cfg;
struct ifcvf_adapter *ifcvf;
u32 i;
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
ifcvf = vf_to_adapter(hw);
cfg = hw->common_cfg;
ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config);
if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) {
IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n");
return -EINVAL;
}
for (i = 0; i < hw->nr_vring; i++) {
if (!hw->vring[i].ready)
break;
ifc_iowrite16(i, &cfg->queue_select);
ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
&cfg->queue_avail_hi);
ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
&cfg->queue_used_hi);
ifc_iowrite16(hw->vring[i].size, &cfg->queue_size);
ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector);
if (ifc_ioread16(&cfg->queue_msix_vector) ==
VIRTIO_MSI_NO_VECTOR) {
IFCVF_ERR(ifcvf->pdev,
"No msix vector for queue %u\n", i);
return -EINVAL;
}
ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx);
ifc_iowrite16(1, &cfg->queue_enable);
}
return 0;
}
static void ifcvf_hw_disable(struct ifcvf_hw *hw)
{
struct virtio_pci_common_cfg __iomem *cfg;
u32 i;
cfg = hw->common_cfg;
ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config);
for (i = 0; i < hw->nr_vring; i++) {
ifc_iowrite16(i, &cfg->queue_select);
ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector);
}
ifc_ioread16(&cfg->queue_msix_vector);
}
int ifcvf_start_hw(struct ifcvf_hw *hw)
{
ifcvf_reset(hw);
ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE);
ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER);
if (ifcvf_config_features(hw) < 0)
return -EINVAL;
if (ifcvf_hw_enable(hw) < 0)
return -EINVAL;
ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK);
return 0;
}
void ifcvf_stop_hw(struct ifcvf_hw *hw)
{
ifcvf_hw_disable(hw);
ifcvf_reset(hw);
}
void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
{
ifc_iowrite16(qid, hw->vring[qid].notify_addr);
}
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Intel IFC VF NIC driver for virtio dataplane offloading
*
* Copyright (C) 2020 Intel Corporation.
*
* Author: Zhu Lingshan <lingshan.zhu@intel.com>
*
*/
#ifndef _IFCVF_H_
#define _IFCVF_H_
#include <linux/pci.h>
#include <linux/pci_regs.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h>
#define IFCVF_VENDOR_ID 0x1AF4
#define IFCVF_DEVICE_ID 0x1041
#define IFCVF_SUBSYS_VENDOR_ID 0x8086
#define IFCVF_SUBSYS_DEVICE_ID 0x001A
#define IFCVF_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VIRTIO_F_ORDER_PLATFORM) | \
(1ULL << VIRTIO_F_IOMMU_PLATFORM) | \
(1ULL << VIRTIO_NET_F_MRG_RXBUF))
/* Only one queue pair for now. */
#define IFCVF_MAX_QUEUE_PAIRS 1
#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE
#define IFCVF_QUEUE_MAX 32768
#define IFCVF_MSI_CONFIG_OFF 0
#define IFCVF_MSI_QUEUE_OFF 1
#define IFCVF_PCI_MAX_RESOURCE 6
#define IFCVF_LM_CFG_SIZE 0x40
#define IFCVF_LM_RING_STATE_OFFSET 0x20
#define IFCVF_LM_BAR 4
#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__)
#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__)
#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__)
#define ifcvf_private_to_vf(adapter) \
(&((struct ifcvf_adapter *)adapter)->vf)
#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
struct vring_info {
u64 desc;
u64 avail;
u64 used;
u16 size;
u16 last_avail_idx;
bool ready;
void __iomem *notify_addr;
u32 irq;
struct vdpa_callback cb;
char msix_name[256];
};
struct ifcvf_hw {
u8 __iomem *isr;
/* Live migration */
u8 __iomem *lm_cfg;
u16 nr_vring;
/* Notification bar number */
u8 notify_bar;
/* Notificaiton bar address */
void __iomem *notify_base;
u32 notify_off_multiplier;
u64 req_features;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
void __iomem * const *base;
};
struct ifcvf_adapter {
struct vdpa_device vdpa;
struct pci_dev *pdev;
struct ifcvf_hw vf;
};
struct ifcvf_vring_lm_cfg {
u32 idx_addr[2];
u8 reserved[IFCVF_LM_CFG_SIZE - 8];
};
struct ifcvf_lm_cfg {
u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
};
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
int ifcvf_start_hw(struct ifcvf_hw *hw);
void ifcvf_stop_hw(struct ifcvf_hw *hw);
void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
void *dst, int length);
void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
const void *src, int length);
u8 ifcvf_get_status(struct ifcvf_hw *hw);
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_features(struct ifcvf_hw *hw);
u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-only
/*
* vDPA bus.
*
* Copyright (c) 2020, Red Hat. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
*
*/
#include <linux/module.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/vdpa.h>
static DEFINE_IDA(vdpa_index_ida);
static int vdpa_dev_probe(struct device *d)
{
struct vdpa_device *vdev = dev_to_vdpa(d);
struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
int ret = 0;
if (drv && drv->probe)
ret = drv->probe(vdev);
return ret;
}
static int vdpa_dev_remove(struct device *d)
{
struct vdpa_device *vdev = dev_to_vdpa(d);
struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
if (drv && drv->remove)
drv->remove(vdev);
return 0;
}
static struct bus_type vdpa_bus = {
.name = "vdpa",
.probe = vdpa_dev_probe,
.remove = vdpa_dev_remove,
};
static void vdpa_release_dev(struct device *d)
{
struct vdpa_device *vdev = dev_to_vdpa(d);
const struct vdpa_config_ops *ops = vdev->config;
if (ops->free)
ops->free(vdev);
ida_simple_remove(&vdpa_index_ida, vdev->index);
kfree(vdev);
}
/**
* __vdpa_alloc_device - allocate and initilaize a vDPA device
* This allows driver to some prepartion after device is
* initialized but before registered.
* @parent: the parent device
* @config: the bus operations that is supported by this device
* @size: size of the parent structure that contains private data
*
* Drvier should use vdap_alloc_device() wrapper macro instead of
* using this directly.
*
* Returns an error when parent/config/dma_dev is not set or fail to get
* ida.
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
size_t size)
{
struct vdpa_device *vdev;
int err = -EINVAL;
if (!config)
goto err;
if (!!config->dma_map != !!config->dma_unmap)
goto err;
err = -ENOMEM;
vdev = kzalloc(size, GFP_KERNEL);
if (!vdev)
goto err;
err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
if (err < 0)
goto err_ida;
vdev->dev.bus = &vdpa_bus;
vdev->dev.parent = parent;
vdev->dev.release = vdpa_release_dev;
vdev->index = err;
vdev->config = config;
err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
if (err)
goto err_name;
device_initialize(&vdev->dev);
return vdev;
err_name:
ida_simple_remove(&vdpa_index_ida, vdev->index);
err_ida:
kfree(vdev);
err:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
/**
* vdpa_register_device - register a vDPA device
* Callers must have a succeed call of vdpa_init_device() before.
* @vdev: the vdpa device to be registered to vDPA bus
*
* Returns an error when fail to add to vDPA bus
*/
int vdpa_register_device(struct vdpa_device *vdev)
{
return device_add(&vdev->dev);
}
EXPORT_SYMBOL_GPL(vdpa_register_device);
/**
* vdpa_unregister_device - unregister a vDPA device
* @vdev: the vdpa device to be unregisted from vDPA bus
*/
void vdpa_unregister_device(struct vdpa_device *vdev)
{
device_unregister(&vdev->dev);
}
EXPORT_SYMBOL_GPL(vdpa_unregister_device);
/**
* __vdpa_register_driver - register a vDPA device driver
* @drv: the vdpa device driver to be registered
* @owner: module owner of the driver
*
* Returns an err when fail to do the registration
*/
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
{
drv->driver.bus = &vdpa_bus;
drv->driver.owner = owner;
return driver_register(&drv->driver);
}
EXPORT_SYMBOL_GPL(__vdpa_register_driver);
/**
* vdpa_unregister_driver - unregister a vDPA device driver
* @drv: the vdpa device driver to be unregistered
*/
void vdpa_unregister_driver(struct vdpa_driver *drv)
{
driver_unregister(&drv->driver);
}
EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
static int vdpa_init(void)
{
return bus_register(&vdpa_bus);
}
static void __exit vdpa_exit(void)
{
bus_unregister(&vdpa_bus);
ida_destroy(&vdpa_index_ida);
}
core_initcall(vdpa_init);
module_exit(vdpa_exit);
MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
MODULE_LICENSE("GPL v2");
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
This diff is collapsed.
# SPDX-License-Identifier: GPL-2.0-only
config VHOST_IOTLB
tristate
help
Generic IOTLB implementation for vhost and vringh.
config VHOST_RING
tristate
select VHOST_IOTLB
help
This option is selected by any driver which needs to access
the host side of a virtio ring.
config VHOST
tristate
select VHOST_IOTLB
help
This option is selected by any driver which needs to access
the core of vhost.
menuconfig VHOST_MENU
bool "VHOST drivers"
default y
if VHOST_MENU
config VHOST_NET
tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP)
......@@ -23,8 +48,8 @@ config VHOST_SCSI
config VHOST_VSOCK
tristate "vhost virtio-vsock driver"
depends on VSOCKETS && EVENTFD
select VIRTIO_VSOCKETS_COMMON
select VHOST
select VIRTIO_VSOCKETS_COMMON
default n
---help---
This kernel module can be loaded in the host kernel to provide AF_VSOCK
......@@ -34,11 +59,17 @@ config VHOST_VSOCK
To compile this driver as a module, choose M here: the module will be called
vhost_vsock.
config VHOST
tristate
---help---
This option is selected by any driver which needs to access
the core of vhost.
config VHOST_VDPA
tristate "Vhost driver for vDPA-based backend"
depends on EVENTFD
select VHOST
select VDPA
help
This kernel module can be loaded in host kernel to accelerate
guest virtio devices with the vDPA-based backends.
To compile this driver as a module, choose M here: the module
will be called vhost_vdpa.
config VHOST_CROSS_ENDIAN_LEGACY
bool "Cross-endian support for vhost"
......@@ -54,3 +85,5 @@ config VHOST_CROSS_ENDIAN_LEGACY
adds some overhead, it is disabled by default.
If unsure, say "N".
endif
# SPDX-License-Identifier: GPL-2.0-only
config VHOST_RING
tristate
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.
......@@ -10,4 +10,10 @@ vhost_vsock-y := vsock.o
obj-$(CONFIG_VHOST_RING) += vringh.o
obj-$(CONFIG_VHOST_VDPA) += vhost_vdpa.o
vhost_vdpa-y := vdpa.o
obj-$(CONFIG_VHOST) += vhost.o
obj-$(CONFIG_VHOST_IOTLB) += vhost_iotlb.o
vhost_iotlb-y := iotlb.o
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2020 Red Hat, Inc.
* Author: Jason Wang <jasowang@redhat.com>
*
* IOTLB implementation for vhost.
*/
#include <linux/slab.h>
#include <linux/vhost_iotlb.h>
#include <linux/module.h>
#define MOD_VERSION "0.1"
#define MOD_DESC "VHOST IOTLB"
#define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>"
#define MOD_LICENSE "GPL v2"
#define START(map) ((map)->start)
#define LAST(map) ((map)->last)
INTERVAL_TREE_DEFINE(struct vhost_iotlb_map,
rb, __u64, __subtree_last,
START, LAST, static inline, vhost_iotlb_itree);
/**
* vhost_iotlb_map_free - remove a map node and free it
* @iotlb: the IOTLB
* @map: the map that want to be remove and freed
*/
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
struct vhost_iotlb_map *map)
{
vhost_iotlb_itree_remove(map, &iotlb->root);
list_del(&map->link);
kfree(map);
iotlb->nmaps--;
}
EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
/**
* vhost_iotlb_add_range - add a new range to vhost IOTLB
* @iotlb: the IOTLB
* @start: start of the IOVA range
* @last: last of IOVA range
* @addr: the address that is mapped to @start
* @perm: access permission of this range
*
* Returns an error last is smaller than start or memory allocation
* fails
*/
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
u64 start, u64 last,
u64 addr, unsigned int perm)
{
struct vhost_iotlb_map *map;
if (last < start)
return -EFAULT;
if (iotlb->limit &&
iotlb->nmaps == iotlb->limit &&
iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
map = list_first_entry(&iotlb->list, typeof(*map), link);
vhost_iotlb_map_free(iotlb, map);
}
map = kmalloc(sizeof(*map), GFP_ATOMIC);
if (!map)
return -ENOMEM;
map->start = start;
map->size = last - start + 1;
map->last = last;
map->addr = addr;
map->perm = perm;
iotlb->nmaps++;
vhost_iotlb_itree_insert(map, &iotlb->root);
INIT_LIST_HEAD(&map->link);
list_add_tail(&map->link, &iotlb->list);
return 0;
}
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
/**
* vring_iotlb_del_range - delete overlapped ranges from vhost IOTLB
* @iotlb: the IOTLB
* @start: start of the IOVA range
* @last: last of IOVA range
*/
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last)
{
struct vhost_iotlb_map *map;
while ((map = vhost_iotlb_itree_iter_first(&iotlb->root,
start, last)))
vhost_iotlb_map_free(iotlb, map);
}
EXPORT_SYMBOL_GPL(vhost_iotlb_del_range);
/**
* vhost_iotlb_alloc - add a new vhost IOTLB
* @limit: maximum number of IOTLB entries
* @flags: VHOST_IOTLB_FLAG_XXX
*
* Returns an error is memory allocation fails
*/
struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags)
{
struct vhost_iotlb *iotlb = kzalloc(sizeof(*iotlb), GFP_KERNEL);
if (!iotlb)
return NULL;
iotlb->root = RB_ROOT_CACHED;
iotlb->limit = limit;
iotlb->nmaps = 0;
iotlb->flags = flags;
INIT_LIST_HEAD(&iotlb->list);
return iotlb;
}
EXPORT_SYMBOL_GPL(vhost_iotlb_alloc);
/**
* vhost_iotlb_reset - reset vhost IOTLB (free all IOTLB entries)
* @iotlb: the IOTLB to be reset
*/
void vhost_iotlb_reset(struct vhost_iotlb *iotlb)
{
vhost_iotlb_del_range(iotlb, 0ULL, 0ULL - 1);
}
EXPORT_SYMBOL_GPL(vhost_iotlb_reset);
/**
* vhost_iotlb_free - reset and free vhost IOTLB
* @iotlb: the IOTLB to be freed
*/
void vhost_iotlb_free(struct vhost_iotlb *iotlb)
{
if (iotlb) {
vhost_iotlb_reset(iotlb);
kfree(iotlb);
}
}
EXPORT_SYMBOL_GPL(vhost_iotlb_free);
/**
* vhost_iotlb_itree_first - return the first overlapped range
* @iotlb: the IOTLB
* @start: start of IOVA range
* @end: end of IOVA range
*/
struct vhost_iotlb_map *
vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
{
return vhost_iotlb_itree_iter_first(&iotlb->root, start, last);
}
EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first);
/**
* vhost_iotlb_itree_first - return the next overlapped range
* @iotlb: the IOTLB
* @start: start of IOVA range
* @end: end of IOVA range
*/
struct vhost_iotlb_map *
vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last)
{
return vhost_iotlb_itree_iter_next(map, start, last);
}
EXPORT_SYMBOL_GPL(vhost_iotlb_itree_next);
MODULE_VERSION(MOD_VERSION);
MODULE_DESCRIPTION(MOD_DESC);
MODULE_AUTHOR(MOD_AUTHOR);
MODULE_LICENSE(MOD_LICENSE);
......@@ -1324,7 +1324,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
UIO_MAXIOV + VHOST_NET_BATCH,
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT,
NULL);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
......@@ -1586,7 +1587,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
struct vhost_umem *umem;
struct vhost_iotlb *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
......
......@@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0);
VHOST_SCSI_WEIGHT, 0, NULL);
vhost_scsi_init_inflight(vs, NULL);
......
This diff is collapsed.
This diff is collapsed.
......@@ -12,6 +12,7 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/atomic.h>
#include <linux/vhost_iotlb.h>
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
......@@ -52,27 +53,6 @@ struct vhost_log {
u64 len;
};
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
struct vhost_umem_node {
struct rb_node rb;
struct list_head link;
__u64 start;
__u64 last;
__u64 size;
__u64 userspace_addr;
__u32 perm;
__u32 flags_padding;
__u64 __subtree_last;
};
struct vhost_umem {
struct rb_root_cached umem_tree;
struct list_head umem_list;
int numem;
};
enum vhost_uaddr_type {
VHOST_ADDR_DESC = 0,
VHOST_ADDR_AVAIL = 1,
......@@ -90,7 +70,7 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx;
......@@ -128,8 +108,8 @@ struct vhost_virtqueue {
struct iovec *indirect;
struct vring_used_elem *heads;
/* Protected by virtqueue mutex. */
struct vhost_umem *umem;
struct vhost_umem *iotlb;
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
void *private_data;
u64 acked_features;
u64 acked_backend_features;
......@@ -164,8 +144,8 @@ struct vhost_dev {
struct eventfd_ctx *log_ctx;
struct llist_head work_list;
struct task_struct *worker;
struct vhost_umem *umem;
struct vhost_umem *iotlb;
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
struct list_head read_list;
struct list_head pending_list;
......@@ -174,16 +154,20 @@ struct vhost_dev {
int weight;
int byte_weight;
u64 kcov_handle;
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg);
};
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
int nvqs, int iov_limit, int weight, int byte_weight);
int nvqs, int iov_limit, int weight, int byte_weight,
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg));
long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *);
struct vhost_umem *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
struct vhost_iotlb *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *iotlb);
void vhost_dev_cleanup(struct vhost_dev *);
void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
......@@ -229,6 +213,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
struct iov_iter *from);
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
struct vhost_iotlb_map *map);
#define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
if ((vq)->error_ctx) \
......
This diff is collapsed.
......@@ -621,7 +621,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
VHOST_VSOCK_WEIGHT);
VHOST_VSOCK_WEIGHT, NULL);
file->private_data = vsock;
spin_lock_init(&vsock->send_pkt_list_lock);
......
......@@ -43,6 +43,19 @@ config VIRTIO_PCI_LEGACY
If unsure, say Y.
config VIRTIO_VDPA
tristate "vDPA driver for virtio devices"
select VDPA
select VIRTIO
help
This driver provides support for virtio based paravirtual
device driver over vDPA bus. For this to be useful, you need
an appropriate vDPA device implementation that operates on a
physical device to allow the datapath of virtio to be
offloaded to hardware.
If unsure, say M.
config VIRTIO_PMEM
tristate "Support for virtio pmem driver"
depends on VIRTIO
......
......@@ -6,3 +6,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
// SPDX-License-Identifier: GPL-2.0-only
/*
* VIRTIO based driver for vDPA device
*
* Copyright (c) 2020, Red Hat. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/uuid.h>
#include <linux/virtio.h>
#include <linux/vdpa.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#define MOD_VERSION "0.1"
#define MOD_AUTHOR "Jason Wang <jasowang@redhat.com>"
#define MOD_DESC "vDPA bus driver for virtio devices"
#define MOD_LICENSE "GPL v2"
struct virtio_vdpa_device {
struct virtio_device vdev;
struct vdpa_device *vdpa;
u64 features;
/* The lock to protect virtqueue list */
spinlock_t lock;
/* List of virtio_vdpa_vq_info */
struct list_head virtqueues;
};
struct virtio_vdpa_vq_info {
/* the actual virtqueue */
struct virtqueue *vq;
/* the list node for the virtqueues list */
struct list_head node;
};
static inline struct virtio_vdpa_device *
to_virtio_vdpa_device(struct virtio_device *dev)
{
return container_of(dev, struct virtio_vdpa_device, vdev);
}
static struct vdpa_device *vd_get_vdpa(struct virtio_device *vdev)
{
return to_virtio_vdpa_device(vdev)->vdpa;
}
static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
ops->get_config(vdpa, offset, buf, len);
}
static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
ops->set_config(vdpa, offset, buf, len);
}
static u32 virtio_vdpa_generation(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->get_generation)
return ops->get_generation(vdpa);
return 0;
}
static u8 virtio_vdpa_get_status(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
return ops->get_status(vdpa);
}
static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
return ops->set_status(vdpa, status);
}
static void virtio_vdpa_reset(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
return ops->set_status(vdpa, 0);
}
static bool virtio_vdpa_notify(struct virtqueue *vq)
{
struct vdpa_device *vdpa = vd_get_vdpa(vq->vdev);
const struct vdpa_config_ops *ops = vdpa->config;
ops->kick_vq(vdpa, vq->index);
return true;
}
static irqreturn_t virtio_vdpa_config_cb(void *private)
{
struct virtio_vdpa_device *vd_dev = private;
virtio_config_changed(&vd_dev->vdev);
return IRQ_HANDLED;
}
static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
{
struct virtio_vdpa_vq_info *info = private;
return vring_interrupt(0, info->vq);
}
static struct virtqueue *
virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name, bool ctx)
{
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
struct virtio_vdpa_vq_info *info;
struct vdpa_callback cb;
struct virtqueue *vq;
u64 desc_addr, driver_addr, device_addr;
unsigned long flags;
u32 align, num;
int err;
if (!name)
return NULL;
/* Queue shouldn't already be set up. */
if (ops->get_vq_ready(vdpa, index))
return ERR_PTR(-ENOENT);
/* Allocate and fill out our active queue description */
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
num = ops->get_vq_num_max(vdpa);
if (num == 0) {
err = -ENOENT;
goto error_new_virtqueue;
}
/* Create the vring */
align = ops->get_vq_align(vdpa);
vq = vring_create_virtqueue(index, num, align, vdev,
true, true, ctx,
virtio_vdpa_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
}
/* Setup virtqueue callback */
cb.callback = virtio_vdpa_virtqueue_cb;
cb.private = info;
ops->set_vq_cb(vdpa, index, &cb);
ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
desc_addr = virtqueue_get_desc_addr(vq);
driver_addr = virtqueue_get_avail_addr(vq);
device_addr = virtqueue_get_used_addr(vq);
if (ops->set_vq_address(vdpa, index,
desc_addr, driver_addr,
device_addr)) {
err = -EINVAL;
goto err_vq;
}
ops->set_vq_ready(vdpa, index, 1);
vq->priv = info;
info->vq = vq;
spin_lock_irqsave(&vd_dev->lock, flags);
list_add(&info->node, &vd_dev->virtqueues);
spin_unlock_irqrestore(&vd_dev->lock, flags);
return vq;
err_vq:
vring_del_virtqueue(vq);
error_new_virtqueue:
ops->set_vq_ready(vdpa, index, 0);
/* VDPA driver should make sure vq is stopeed here */
WARN_ON(ops->get_vq_ready(vdpa, index));
kfree(info);
return ERR_PTR(err);
}
static void virtio_vdpa_del_vq(struct virtqueue *vq)
{
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
struct vdpa_device *vdpa = vd_dev->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct virtio_vdpa_vq_info *info = vq->priv;
unsigned int index = vq->index;
unsigned long flags;
spin_lock_irqsave(&vd_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vd_dev->lock, flags);
/* Select and deactivate the queue */
ops->set_vq_ready(vdpa, index, 0);
WARN_ON(ops->get_vq_ready(vdpa, index));
vring_del_virtqueue(vq);
kfree(info);
}
static void virtio_vdpa_del_vqs(struct virtio_device *vdev)
{
struct virtqueue *vq, *n;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
virtio_vdpa_del_vq(vq);
}
static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
const bool *ctx,
struct irq_affinity *desc)
{
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_callback cb;
int i, err, queue_idx = 0;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
}
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
callbacks[i], names[i], ctx ?
ctx[i] : false);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto err_setup_vq;
}
}
cb.callback = virtio_vdpa_config_cb;
cb.private = vd_dev;
ops->set_config_cb(vdpa, &cb);
return 0;
err_setup_vq:
virtio_vdpa_del_vqs(vdev);
return err;
}
static u64 virtio_vdpa_get_features(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
return ops->get_features(vdpa);
}
static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
{
struct vdpa_device *vdpa = vd_get_vdpa(vdev);
const struct vdpa_config_ops *ops = vdpa->config;
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
return ops->set_features(vdpa, vdev->features);
}
static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
{
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_dev->vdpa;
return dev_name(&vdpa->dev);
}
static const struct virtio_config_ops virtio_vdpa_config_ops = {
.get = virtio_vdpa_get,
.set = virtio_vdpa_set,
.generation = virtio_vdpa_generation,
.get_status = virtio_vdpa_get_status,
.set_status = virtio_vdpa_set_status,
.reset = virtio_vdpa_reset,
.find_vqs = virtio_vdpa_find_vqs,
.del_vqs = virtio_vdpa_del_vqs,
.get_features = virtio_vdpa_get_features,
.finalize_features = virtio_vdpa_finalize_features,
.bus_name = virtio_vdpa_bus_name,
};
static void virtio_vdpa_release_dev(struct device *_d)
{
struct virtio_device *vdev =
container_of(_d, struct virtio_device, dev);
struct virtio_vdpa_device *vd_dev =
container_of(vdev, struct virtio_vdpa_device, vdev);
kfree(vd_dev);
}
static int virtio_vdpa_probe(struct vdpa_device *vdpa)
{
const struct vdpa_config_ops *ops = vdpa->config;
struct virtio_vdpa_device *vd_dev, *reg_dev = NULL;
int ret = -EINVAL;
vd_dev = kzalloc(sizeof(*vd_dev), GFP_KERNEL);
if (!vd_dev)
return -ENOMEM;
vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa);
vd_dev->vdev.dev.release = virtio_vdpa_release_dev;
vd_dev->vdev.config = &virtio_vdpa_config_ops;
vd_dev->vdpa = vdpa;
INIT_LIST_HEAD(&vd_dev->virtqueues);
spin_lock_init(&vd_dev->lock);
vd_dev->vdev.id.device = ops->get_device_id(vdpa);
if (vd_dev->vdev.id.device == 0)
goto err;
vd_dev->vdev.id.vendor = ops->get_vendor_id(vdpa);
ret = register_virtio_device(&vd_dev->vdev);
reg_dev = vd_dev;
if (ret)
goto err;
vdpa_set_drvdata(vdpa, vd_dev);
return 0;
err:
if (reg_dev)
put_device(&vd_dev->vdev.dev);
else
kfree(vd_dev);
return ret;
}
static void virtio_vdpa_remove(struct vdpa_device *vdpa)
{
struct virtio_vdpa_device *vd_dev = vdpa_get_drvdata(vdpa);
unregister_virtio_device(&vd_dev->vdev);
}
static struct vdpa_driver virtio_vdpa_driver = {
.driver = {
.name = "virtio_vdpa",
},
.probe = virtio_vdpa_probe,
.remove = virtio_vdpa_remove,
};
module_vdpa_driver(virtio_vdpa_driver);
MODULE_VERSION(MOD_VERSION);
MODULE_LICENSE(MOD_LICENSE);
MODULE_AUTHOR(MOD_AUTHOR);
MODULE_DESCRIPTION(MOD_DESC);
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VDPA_H
#define _LINUX_VDPA_H
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/vhost_iotlb.h>
/**
* vDPA callback definition.
* @callback: interrupt callback function
* @private: the data passed to the callback function
*/
struct vdpa_callback {
irqreturn_t (*callback)(void *data);
void *private;
};
/**
* vDPA device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
* @index: device index
*/
struct vdpa_device {
struct device dev;
struct device *dma_dev;
const struct vdpa_config_ops *config;
unsigned int index;
};
/**
* vDPA_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
* list.
*
* @set_vq_address: Set the address of virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @desc_area: address of desc area
* @driver_area: address of driver area
* @device_area: address of device area
* Returns integer: success (0) or error (< 0)
* @set_vq_num: Set the size of virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @num: the size of virtqueue
* @kick_vq: Kick the virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @set_vq_cb: Set the interrupt callback function for
* a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @cb: virtio-vdev interrupt callback structure
* @set_vq_ready: Set ready status for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @ready: ready (true) not ready(false)
* @get_vq_ready: Get ready status for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns boolean: ready (true) or not (false)
* @set_vq_state: Set the state for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* @state: virtqueue state (last_avail_idx)
* Returns integer: success (0) or error (< 0)
* @get_vq_state: Get the state for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns virtqueue state (last_avail_idx)
* @get_vq_align: Get the virtqueue align requirement
* for the device
* @vdev: vdpa device
* Returns virtqueue algin requirement
* @get_features: Get virtio features supported by the device
* @vdev: vdpa device
* Returns the virtio features support by the
* device
* @set_features: Set virtio features supported by the driver
* @vdev: vdpa device
* @features: feature support by the driver
* Returns integer: success (0) or error (< 0)
* @set_config_cb: Set the config interrupt callback
* @vdev: vdpa device
* @cb: virtio-vdev interrupt callback structure
* @get_vq_num_max: Get the max size of virtqueue
* @vdev: vdpa device
* Returns u16: max size of virtqueue
* @get_device_id: Get virtio device id
* @vdev: vdpa device
* Returns u32: virtio device id
* @get_vendor_id: Get id for the vendor that provides this device
* @vdev: vdpa device
* Returns u32: virtio vendor id
* @get_status: Get the device status
* @vdev: vdpa device
* Returns u8: virtio device status
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
* @get_config: Read from device specific configuration space
* @vdev: vdpa device
* @offset: offset from the beginning of
* configuration space
* @buf: buffer used to read to
* @len: the length to read from
* configuration space
* @set_config: Write to device specific configuration space
* @vdev: vdpa device
* @offset: offset from the beginning of
* configuration space
* @buf: buffer used to write from
* @len: the length to write to
* configuration space
* @get_generation: Get device config generation (optional)
* @vdev: vdpa device
* Returns u32: device generation
* @set_map: Set device memory mapping (optional)
* Needed for device that using device
* specific DMA translation (on-chip IOMMU)
* @vdev: vdpa device
* @iotlb: vhost memory mapping to be
* used by the vDPA
* Returns integer: success (0) or error (< 0)
* @dma_map: Map an area of PA to IOVA (optional)
* Needed for device that using device
* specific DMA translation (on-chip IOMMU)
* and preferring incremental map.
* @vdev: vdpa device
* @iova: iova to be mapped
* @size: size of the area
* @pa: physical address for the map
* @perm: device access permission (VHOST_MAP_XX)
* Returns integer: success (0) or error (< 0)
* @dma_unmap: Unmap an area of IOVA (optional but
* must be implemented with dma_map)
* Needed for device that using device
* specific DMA translation (on-chip IOMMU)
* and preferring incremental unmap.
* @vdev: vdpa device
* @iova: iova to be unmapped
* @size: size of the area
* Returns integer: success (0) or error (< 0)
* @free: Free resources that belongs to vDPA (optional)
* @vdev: vdpa device
*/
struct vdpa_config_ops {
/* Virtqueue ops */
int (*set_vq_address)(struct vdpa_device *vdev,
u16 idx, u64 desc_area, u64 driver_area,
u64 device_area);
void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num);
void (*kick_vq)(struct vdpa_device *vdev, u16 idx);
void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx,
struct vdpa_callback *cb);
void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready);
bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx);
int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state);
u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx);
/* Device ops */
u16 (*get_vq_align)(struct vdpa_device *vdev);
u64 (*get_features)(struct vdpa_device *vdev);
int (*set_features)(struct vdpa_device *vdev, u64 features);
void (*set_config_cb)(struct vdpa_device *vdev,
struct vdpa_callback *cb);
u16 (*get_vq_num_max)(struct vdpa_device *vdev);
u32 (*get_device_id)(struct vdpa_device *vdev);
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev);
/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
u64 pa, u32 perm);
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
/* Free device resources */
void (*free)(struct vdpa_device *vdev);
};
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
size_t size);
#define vdpa_alloc_device(dev_struct, member, parent, config) \
container_of(__vdpa_alloc_device( \
parent, config, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
dev_struct, member))), \
dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev);
void vdpa_unregister_device(struct vdpa_device *vdev);
/**
* vdpa_driver - operations for a vDPA driver
* @driver: underlying device driver
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @remove: the function to call when a device is removed.
*/
struct vdpa_driver {
struct device_driver driver;
int (*probe)(struct vdpa_device *vdev);
void (*remove)(struct vdpa_device *vdev);
};
#define vdpa_register_driver(drv) \
__vdpa_register_driver(drv, THIS_MODULE)
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner);
void vdpa_unregister_driver(struct vdpa_driver *drv);
#define module_vdpa_driver(__vdpa_driver) \
module_driver(__vdpa_driver, vdpa_register_driver, \
vdpa_unregister_driver)
static inline struct vdpa_driver *drv_to_vdpa(struct device_driver *driver)
{
return container_of(driver, struct vdpa_driver, driver);
}
static inline struct vdpa_device *dev_to_vdpa(struct device *_dev)
{
return container_of(_dev, struct vdpa_device, dev);
}
static inline void *vdpa_get_drvdata(const struct vdpa_device *vdev)
{
return dev_get_drvdata(&vdev->dev);
}
static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data)
{
dev_set_drvdata(&vdev->dev, data);
}
static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
{
return vdev->dma_dev;
}
#endif /* _LINUX_VDPA_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VHOST_IOTLB_H
#define _LINUX_VHOST_IOTLB_H
#include <linux/interval_tree_generic.h>
struct vhost_iotlb_map {
struct rb_node rb;
struct list_head link;
u64 start;
u64 last;
u64 size;
u64 addr;
#define VHOST_MAP_RO 0x1
#define VHOST_MAP_WO 0x2
#define VHOST_MAP_RW 0x3
u32 perm;
u32 flags_padding;
u64 __subtree_last;
};
#define VHOST_IOTLB_FLAG_RETIRE 0x1
struct vhost_iotlb {
struct rb_root_cached root;
struct list_head list;
unsigned int limit;
unsigned int nmaps;
unsigned int flags;
};
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
u64 addr, unsigned int perm);
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags);
void vhost_iotlb_free(struct vhost_iotlb *iotlb);
void vhost_iotlb_reset(struct vhost_iotlb *iotlb);
struct vhost_iotlb_map *
vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last);
struct vhost_iotlb_map *
vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last);
void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
struct vhost_iotlb_map *map);
#endif
......@@ -14,6 +14,8 @@
#include <linux/virtio_byteorder.h>
#include <linux/uio.h>
#include <linux/slab.h>
#include <linux/dma-direction.h>
#include <linux/vhost_iotlb.h>
#include <asm/barrier.h>
/* virtio_ring with information needed for host access. */
......@@ -39,6 +41,9 @@ struct vringh {
/* The vring (note: it may contain user pointers!) */
struct vring vring;
/* IOTLB for this vring */
struct vhost_iotlb *iotlb;
/* The function to call to notify the guest about added buffers */
void (*notify)(struct vringh *);
};
......@@ -248,4 +253,35 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
{
return __cpu_to_virtio64(vringh_is_little_endian(vrh), val);
}
void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
int vringh_init_iotlb(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,
struct vring_desc *desc,
struct vring_avail *avail,
struct vring_used *used);
int vringh_getdesc_iotlb(struct vringh *vrh,
struct vringh_kiov *riov,
struct vringh_kiov *wiov,
u16 *head,
gfp_t gfp);
ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
struct vringh_kiov *riov,
void *dst, size_t len);
ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
struct vringh_kiov *wiov,
const void *src, size_t len);
void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num);
int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len);
bool vringh_notify_enable_iotlb(struct vringh *vrh);
void vringh_notify_disable_iotlb(struct vringh *vrh);
int vringh_need_notify_iotlb(struct vringh *vrh);
#endif /* _LINUX_VRINGH_H */
......@@ -116,4 +116,28 @@
#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
/* VHOST_VDPA specific defines */
/* Get the device id. The device ids follow the same definition of
* the device id defined in virtio-spec.
*/
#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
/* Get and set the status. The status bits follow the same definition
* of the device status defined in virtio-spec.
*/
#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
/* Get and set the device config. The device config follows the same
* definition of the device config defined in virtio-spec.
*/
#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
struct vhost_vdpa_config)
#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
struct vhost_vdpa_config)
/* Enable/disable the ring. */
#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
struct vhost_vring_state)
/* Get the max ring size. */
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
#endif
......@@ -119,6 +119,14 @@ struct vhost_scsi_target {
unsigned short reserved;
};
/* VHOST_VDPA specific definitions */
struct vhost_vdpa_config {
__u32 off;
__u32 len;
__u8 buf[0];
};
/* Feature bits */
/* Log all write descriptors. Can be changed while device is active. */
#define VHOST_F_LOG_ALL 26
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment