Commit 2fb732b3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.7-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - vfio-pci SR-IOV support (Alex Williamson)

 - vfio DMA read/write interface (Yan Zhao)

 - Fix vfio-platform erroneous IRQ error log (Eric Auger)

 - Fix shared ATSD support for NVLink on POWER (Sam Bobroff)

 - Fix init error without CONFIG_IOMMU_DMA (Andre Przywara)

* tag 'vfio-v5.7-rc1' of git://github.com/awilliam/linux-vfio:
  vfio: Ignore -ENODEV when getting MSI cookie
  vfio-pci/nvlink2: Allow fallback to ibm,mmio-atsd[0]
  vfio/pci: Cleanup .probe() exit paths
  vfio/pci: Remove dev_fmt definition
  vfio/pci: Add sriov_configure support
  vfio: Introduce VFIO_DEVICE_FEATURE ioctl and first user
  vfio/pci: Introduce VF token
  vfio/pci: Implement match ops
  vfio: Include optional device match in vfio_device_ops callbacks
  vfio: avoid inefficient operations on VFIO group in vfio_pin/unpin_pages
  vfio: introduce vfio_dma_rw to read/write a range of IOVAs
  vfio: allow external user to get vfio group from device
  vfio: platform: Switch to platform_get_irq_optional()
parents ad0bf4eb f44efca0
This diff is collapsed.
......@@ -422,8 +422,14 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
&mmio_atsd)) {
dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
mmio_atsd = 0;
if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0,
&mmio_atsd)) {
dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
mmio_atsd = 0;
} else {
dev_warn(&vdev->pdev->dev,
"Using fallback ibm,mmio-atsd[0] for ATSD.\n");
}
}
if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
......
......@@ -12,6 +12,8 @@
#include <linux/pci.h>
#include <linux/irqbypass.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/notifier.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
......@@ -84,6 +86,12 @@ struct vfio_pci_reflck {
struct mutex lock;
};
struct vfio_pci_vf_token {
struct mutex lock;
uuid_t uuid;
int users;
};
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_NUM_BARS];
......@@ -122,6 +130,8 @@ struct vfio_pci_device {
struct list_head dummy_resources_list;
struct mutex ioeventfds_lock;
struct list_head ioeventfds_list;
struct vfio_pci_vf_token *vf_token;
struct notifier_block nb;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
......
......@@ -44,7 +44,7 @@ static int get_platform_irq(struct vfio_platform_device *vdev, int i)
{
struct platform_device *pdev = (struct platform_device *) vdev->opaque;
return platform_get_irq(pdev, i);
return platform_get_irq_optional(pdev, i);
}
static int vfio_platform_probe(struct platform_device *pdev)
......
......@@ -875,11 +875,23 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
char *buf)
{
struct vfio_device *it, *device = NULL;
struct vfio_device *it, *device = ERR_PTR(-ENODEV);
mutex_lock(&group->device_lock);
list_for_each_entry(it, &group->device_list, group_next) {
if (!strcmp(dev_name(it->dev), buf)) {
int ret;
if (it->ops->match) {
ret = it->ops->match(it->device_data, buf);
if (ret < 0) {
device = ERR_PTR(ret);
break;
}
} else {
ret = !strcmp(dev_name(it->dev), buf);
}
if (ret) {
device = it;
vfio_device_get(device);
break;
......@@ -1430,8 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
return -EPERM;
device = vfio_device_get_from_name(group, buf);
if (!device)
return -ENODEV;
if (IS_ERR(device))
return PTR_ERR(device);
ret = device->ops->open(device->device_data);
if (ret) {
......@@ -1720,6 +1732,44 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
}
EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
/**
* External user API, exported by symbols to be linked dynamically.
* The external user passes in a device pointer
* to verify that:
* - A VFIO group is assiciated with the device;
* - IOMMU is set for the group.
* If both checks passed, vfio_group_get_external_user_from_dev()
* increments the container user counter to prevent the VFIO group
* from disposal before external user exits and returns the pointer
* to the VFIO group.
*
* When the external user finishes using the VFIO group, it calls
* vfio_group_put_external_user() to release the VFIO group and
* decrement the container user counter.
*
* @dev [in] : device
* Return error PTR or pointer to VFIO group.
*/
struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev)
{
struct vfio_group *group;
int ret;
group = vfio_group_get_from_dev(dev);
if (!group)
return ERR_PTR(-ENODEV);
ret = vfio_group_add_container_user(group);
if (ret) {
vfio_group_put(group);
return ERR_PTR(ret);
}
return group;
}
EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev);
void vfio_group_put_external_user(struct vfio_group *group)
{
vfio_group_try_dissolve_container(group);
......@@ -1961,6 +2011,146 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
}
EXPORT_SYMBOL(vfio_unpin_pages);
/*
* Pin a set of guest IOVA PFNs and return their associated host PFNs for a
* VFIO group.
*
* The caller needs to call vfio_group_get_external_user() or
* vfio_group_get_external_user_from_dev() prior to calling this interface,
* so as to prevent the VFIO group from disposal in the middle of the call.
* But it can keep the reference to the VFIO group for several calls into
* this interface.
* After finishing using of the VFIO group, the caller needs to release the
* VFIO group by calling vfio_group_put_external_user().
*
* @group [in] : VFIO group
* @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned.
* @npage [in] : count of elements in user_iova_pfn array.
* This count should not be greater
* VFIO_PIN_PAGES_MAX_ENTRIES.
* @prot [in] : protection flags
* @phys_pfn [out] : array of host PFNs
* Return error or number of pages pinned.
*/
int vfio_group_pin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage,
int prot, unsigned long *phys_pfn)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
int ret;
if (!group || !user_iova_pfn || !phys_pfn || !npage)
return -EINVAL;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
user_iova_pfn, npage,
prot, phys_pfn);
else
ret = -ENOTTY;
return ret;
}
EXPORT_SYMBOL(vfio_group_pin_pages);
/*
* Unpin a set of guest IOVA PFNs for a VFIO group.
*
* The caller needs to call vfio_group_get_external_user() or
* vfio_group_get_external_user_from_dev() prior to calling this interface,
* so as to prevent the VFIO group from disposal in the middle of the call.
* But it can keep the reference to the VFIO group for several calls into
* this interface.
* After finishing using of the VFIO group, the caller needs to release the
* VFIO group by calling vfio_group_put_external_user().
*
* @group [in] : vfio group
* @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned.
* @npage [in] : count of elements in user_iova_pfn array.
* This count should not be greater than
* VFIO_PIN_PAGES_MAX_ENTRIES.
* Return error or number of pages unpinned.
*/
int vfio_group_unpin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
int ret;
if (!group || !user_iova_pfn || !npage)
return -EINVAL;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->unpin_pages))
ret = driver->ops->unpin_pages(container->iommu_data,
user_iova_pfn, npage);
else
ret = -ENOTTY;
return ret;
}
EXPORT_SYMBOL(vfio_group_unpin_pages);
/*
* This interface allows the CPUs to perform some sort of virtual DMA on
* behalf of the device.
*
* CPUs read/write from/into a range of IOVAs pointing to user space memory
* into/from a kernel buffer.
*
* As the read/write of user space memory is conducted via the CPUs and is
* not a real device DMA, it is not necessary to pin the user space memory.
*
* The caller needs to call vfio_group_get_external_user() or
* vfio_group_get_external_user_from_dev() prior to calling this interface,
* so as to prevent the VFIO group from disposal in the middle of the call.
* But it can keep the reference to the VFIO group for several calls into
* this interface.
* After finishing using of the VFIO group, the caller needs to release the
* VFIO group by calling vfio_group_put_external_user().
*
* @group [in] : VFIO group
* @user_iova [in] : base IOVA of a user space buffer
* @data [in] : pointer to kernel buffer
* @len [in] : kernel buffer length
* @write : indicate read or write
* Return error code on failure or 0 on success.
*/
int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
void *data, size_t len, bool write)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
int ret = 0;
if (!group || !data || len <= 0)
return -EINVAL;
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->dma_rw))
ret = driver->ops->dma_rw(container->iommu_data,
user_iova, data, len, write);
else
ret = -ENOTTY;
return ret;
}
EXPORT_SYMBOL(vfio_dma_rw);
static int vfio_register_iommu_notifier(struct vfio_group *group,
unsigned long *events,
struct notifier_block *nb)
......
......@@ -27,6 +27,7 @@
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/rbtree.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
......@@ -1786,7 +1787,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (resv_msi) {
ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
if (ret)
if (ret && ret != -ENODEV)
goto out_detach;
}
......@@ -2305,6 +2306,80 @@ static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
return blocking_notifier_chain_unregister(&iommu->notifier, nb);
}
static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
dma_addr_t user_iova, void *data,
size_t count, bool write,
size_t *copied)
{
struct mm_struct *mm;
unsigned long vaddr;
struct vfio_dma *dma;
bool kthread = current->mm == NULL;
size_t offset;
*copied = 0;
dma = vfio_find_dma(iommu, user_iova, 1);
if (!dma)
return -EINVAL;
if ((write && !(dma->prot & IOMMU_WRITE)) ||
!(dma->prot & IOMMU_READ))
return -EPERM;
mm = get_task_mm(dma->task);
if (!mm)
return -EPERM;
if (kthread)
use_mm(mm);
else if (current->mm != mm)
goto out;
offset = user_iova - dma->iova;
if (count > dma->size - offset)
count = dma->size - offset;
vaddr = dma->vaddr + offset;
if (write)
*copied = __copy_to_user((void __user *)vaddr, data,
count) ? 0 : count;
else
*copied = __copy_from_user(data, (void __user *)vaddr,
count) ? 0 : count;
if (kthread)
unuse_mm(mm);
out:
mmput(mm);
return *copied ? 0 : -EFAULT;
}
static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write)
{
struct vfio_iommu *iommu = iommu_data;
int ret = 0;
size_t done;
mutex_lock(&iommu->lock);
while (count > 0) {
ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
count, write, &done);
if (ret)
break;
count -= done;
data += done;
user_iova += done;
}
mutex_unlock(&iommu->lock);
return ret;
}
static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.name = "vfio-iommu-type1",
.owner = THIS_MODULE,
......@@ -2317,6 +2392,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.unpin_pages = vfio_iommu_type1_unpin_pages,
.register_notifier = vfio_iommu_type1_register_notifier,
.unregister_notifier = vfio_iommu_type1_unregister_notifier,
.dma_rw = vfio_iommu_type1_dma_rw,
};
static int __init vfio_iommu_type1_init(void)
......
......@@ -26,6 +26,9 @@
* operations documented below
* @mmap: Perform mmap(2) on a region of the device file descriptor
* @request: Request for the bus driver to release the device
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
*/
struct vfio_device_ops {
char *name;
......@@ -39,6 +42,7 @@ struct vfio_device_ops {
unsigned long arg);
int (*mmap)(void *device_data, struct vm_area_struct *vma);
void (*request)(void *device_data, unsigned int count);
int (*match)(void *device_data, char *buf);
};
extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
......@@ -82,6 +86,8 @@ struct vfio_iommu_driver_ops {
struct notifier_block *nb);
int (*unregister_notifier)(void *iommu_data,
struct notifier_block *nb);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write);
};
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
......@@ -94,6 +100,8 @@ extern void vfio_unregister_iommu_driver(
*/
extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
extern void vfio_group_put_external_user(struct vfio_group *group);
extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
*dev);
extern bool vfio_external_group_match_file(struct vfio_group *group,
struct file *filep);
extern int vfio_external_user_iommu_id(struct vfio_group *group);
......@@ -107,6 +115,15 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
int npage);
extern int vfio_group_pin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage,
int prot, unsigned long *phys_pfn);
extern int vfio_group_unpin_pages(struct vfio_group *group,
unsigned long *user_iova_pfn, int npage);
extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
void *data, size_t len, bool write);
/* each type has independent events */
enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0,
......
......@@ -707,6 +707,43 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
/**
* VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17,
* struct vfio_device_feature)
*
* Get, set, or probe feature data of the device. The feature is selected
* using the FEATURE_MASK portion of the flags field. Support for a feature
* can be probed by setting both the FEATURE_MASK and PROBE bits. A probe
* may optionally include the GET and/or SET bits to determine read vs write
* access of the feature respectively. Probing a feature will return success
* if the feature is supported and all of the optionally indicated GET/SET
* methods are supported. The format of the data portion of the structure is
* specific to the given feature. The data portion is not required for
* probing. GET and SET are mutually exclusive, except for use with PROBE.
*
* Return 0 on success, -errno on failure.
*/
struct vfio_device_feature {
__u32 argsz;
__u32 flags;
#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */
#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */
#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */
#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */
__u8 data[];
};
#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17)
/*
* Provide support for setting a PCI VF Token, which is used as a shared
* secret between PF and VF drivers. This feature may only be set on a
* PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing
* open VFs. Data provided when setting this feature is a 16-byte array
* (__u8 b[16]), representing a UUID.
*/
#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0)
/* -------- API for Type1 VFIO IOMMU -------- */
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment