Commit 68132b35 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v6.10-rc4' of https://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:
 "Fix long standing lockdep issue of using remap_pfn_range() from the
  vfio-pci fault handler for mapping device MMIO. Commit ba168b52
  ("mm: use rwsem assertion macros for mmap_lock") now exposes this as a
  warning forcing this to be addressed.

  remap_pfn_range() was used here to efficiently map the entire vma, but
  it really never should have been used in the fault handler and doesn't
  handle concurrency, which introduced complex locking. We also needed
  to track vmas mapping the device memory in order to zap those vmas
  when the memory is disabled resulting in a vma list.

  Instead of all that mess, setup an address space on the device fd
  such that we can use unmap_mapping_range() for zapping to avoid the
  tracking overhead and use the standard vmf_insert_pfn() to insert
  mappings on fault.

  For now we'll iterate the vma and opportunistically try to insert
  mappings for the entire vma. This aligns with typical use cases, but
  hopefully in the future we can drop the iterative approach and make
  use of huge_fault instead, once vmf_insert_pfn{pud,pmd}() learn to
  handle pfnmaps"

* tag 'vfio-v6.10-rc4' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Insert full vma on mmap'd MMIO fault
  vfio/pci: Use unmap_mapping_range()
  vfio: Create vfio_fs_type with inode per device
parents c286c21f d71a989c
...@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) ...@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
filep->private_data = df; filep->private_data = df;
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
return 0; return 0;
err_put_registration: err_put_registration:
......
...@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device) ...@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
*/ */
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
if (device->group->type == VFIO_NO_IOMMU) if (device->group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user " dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current)); "(%s:%d)\n", current->comm, task_pid_nr(current));
......
...@@ -1610,100 +1610,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu ...@@ -1610,100 +1610,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
} }
EXPORT_SYMBOL_GPL(vfio_pci_core_write); EXPORT_SYMBOL_GPL(vfio_pci_core_write);
/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)
{ {
struct vfio_pci_mmap_vma *mmap_vma, *tmp; struct vfio_device *core_vdev = &vdev->vdev;
loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
loff_t len = end - start;
/* unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
* Lock ordering:
* vma_lock is nested under mmap_lock for vm_ops callback paths.
* The memory_lock semaphore is used by both code paths calling
* into this function to zap vmas and the vm_ops.fault callback
* to protect the memory enable state of the device.
*
* When zapping vmas we need to maintain the mmap_lock => vma_lock
* ordering, which requires using vma_lock to walk vma_list to
* acquire an mm, then dropping vma_lock to get the mmap_lock and
* reacquiring vma_lock. This logic is derived from similar
* requirements in uverbs_user_mmap_disassociate().
*
* mmap_lock must always be the top-level lock when it is taken.
* Therefore we can only hold the memory_lock write lock when
* vma_list is empty, as we'd need to take mmap_lock to clear
* entries. vma_list can only be guaranteed empty when holding
* vma_lock, thus memory_lock is nested under vma_lock.
*
* This enables the vm_ops.fault callback to acquire vma_lock,
* followed by memory_lock read lock, while already holding
* mmap_lock without risk of deadlock.
*/
while (1) {
struct mm_struct *mm = NULL;
if (try) {
if (!mutex_trylock(&vdev->vma_lock))
return 0;
} else {
mutex_lock(&vdev->vma_lock);
}
while (!list_empty(&vdev->vma_list)) {
mmap_vma = list_first_entry(&vdev->vma_list,
struct vfio_pci_mmap_vma,
vma_next);
mm = mmap_vma->vma->vm_mm;
if (mmget_not_zero(mm))
break;
list_del(&mmap_vma->vma_next);
kfree(mmap_vma);
mm = NULL;
}
if (!mm)
return 1;
mutex_unlock(&vdev->vma_lock);
if (try) {
if (!mmap_read_trylock(mm)) {
mmput(mm);
return 0;
}
} else {
mmap_read_lock(mm);
}
if (try) {
if (!mutex_trylock(&vdev->vma_lock)) {
mmap_read_unlock(mm);
mmput(mm);
return 0;
}
} else {
mutex_lock(&vdev->vma_lock);
}
list_for_each_entry_safe(mmap_vma, tmp,
&vdev->vma_list, vma_next) {
struct vm_area_struct *vma = mmap_vma->vma;
if (vma->vm_mm != mm)
continue;
list_del(&mmap_vma->vma_next);
kfree(mmap_vma);
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
}
mutex_unlock(&vdev->vma_lock);
mmap_read_unlock(mm);
mmput(mm);
}
} }
void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev) void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
{ {
vfio_pci_zap_and_vma_lock(vdev, false);
down_write(&vdev->memory_lock); down_write(&vdev->memory_lock);
mutex_unlock(&vdev->vma_lock); vfio_pci_zap_bars(vdev);
} }
u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev) u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
...@@ -1725,99 +1645,56 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c ...@@ -1725,99 +1645,56 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c
up_write(&vdev->memory_lock); up_write(&vdev->memory_lock);
} }
/* Caller holds vma_lock */ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
struct vm_area_struct *vma)
{
struct vfio_pci_mmap_vma *mmap_vma;
mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
if (!mmap_vma)
return -ENOMEM;
mmap_vma->vma = vma;
list_add(&mmap_vma->vma_next, &vdev->vma_list);
return 0;
}
/*
* Zap mmaps on open so that we can fault them in on access and therefore
* our vma_list only tracks mappings accessed since last zap.
*/
static void vfio_pci_mmap_open(struct vm_area_struct *vma)
{
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
static void vfio_pci_mmap_close(struct vm_area_struct *vma)
{ {
struct vfio_pci_core_device *vdev = vma->vm_private_data; struct vfio_pci_core_device *vdev = vma->vm_private_data;
struct vfio_pci_mmap_vma *mmap_vma; int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
u64 pgoff;
mutex_lock(&vdev->vma_lock); pgoff = vma->vm_pgoff &
list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
if (mmap_vma->vma == vma) {
list_del(&mmap_vma->vma_next); return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
kfree(mmap_vma);
break;
}
}
mutex_unlock(&vdev->vma_lock);
} }
static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
struct vfio_pci_core_device *vdev = vma->vm_private_data; struct vfio_pci_core_device *vdev = vma->vm_private_data;
struct vfio_pci_mmap_vma *mmap_vma; unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
vm_fault_t ret = VM_FAULT_NOPAGE; unsigned long addr = vma->vm_start;
vm_fault_t ret = VM_FAULT_SIGBUS;
pfn = vma_to_pfn(vma);
mutex_lock(&vdev->vma_lock);
down_read(&vdev->memory_lock); down_read(&vdev->memory_lock);
/* if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
* Memory region cannot be accessed if the low power feature is engaged goto out_unlock;
* or memory access is disabled.
*/ ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) { if (ret & VM_FAULT_ERROR)
ret = VM_FAULT_SIGBUS; goto out_unlock;
goto up_out;
}
/* /*
* We populate the whole vma on fault, so we need to test whether * Pre-fault the remainder of the vma, abort further insertions and
* the vma has already been mapped, such as for concurrent faults * supress error if fault is encountered during pre-fault.
* to the same vma. io_remap_pfn_range() will trigger a BUG_ON if
* we ask it to fill the same range again.
*/ */
list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) {
if (mmap_vma->vma == vma) if (addr == vmf->address)
goto up_out; continue;
}
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
vma->vm_end - vma->vm_start,
vma->vm_page_prot)) {
ret = VM_FAULT_SIGBUS;
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
goto up_out;
}
if (__vfio_pci_add_vma(vdev, vma)) { if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
ret = VM_FAULT_OOM; break;
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
} }
up_out: out_unlock:
up_read(&vdev->memory_lock); up_read(&vdev->memory_lock);
mutex_unlock(&vdev->vma_lock);
return ret; return ret;
} }
static const struct vm_operations_struct vfio_pci_mmap_ops = { static const struct vm_operations_struct vfio_pci_mmap_ops = {
.open = vfio_pci_mmap_open,
.close = vfio_pci_mmap_close,
.fault = vfio_pci_mmap_fault, .fault = vfio_pci_mmap_fault,
}; };
...@@ -1880,11 +1757,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma ...@@ -1880,11 +1757,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
vma->vm_private_data = vdev; vma->vm_private_data = vdev;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
/* /*
* See remap_pfn_range(), called from vfio_pci_fault() but we can't * Set vm_flags now, they should not be changed in the fault handler.
* change vm_flags within the fault handler. Set them now. * We want the same flags and page protection (decrypted above) as
* io_remap_pfn_range() would set.
* *
* VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64, * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
* allowing KVM stage 2 device mapping attributes to use Normal-NC * allowing KVM stage 2 device mapping attributes to use Normal-NC
...@@ -2202,8 +2080,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) ...@@ -2202,8 +2080,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
mutex_init(&vdev->ioeventfds_lock); mutex_init(&vdev->ioeventfds_lock);
INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->dummy_resources_list);
INIT_LIST_HEAD(&vdev->ioeventfds_list); INIT_LIST_HEAD(&vdev->ioeventfds_list);
mutex_init(&vdev->vma_lock);
INIT_LIST_HEAD(&vdev->vma_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item); INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock); init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx); xa_init(&vdev->ctx);
...@@ -2219,7 +2095,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) ...@@ -2219,7 +2095,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
mutex_destroy(&vdev->igate); mutex_destroy(&vdev->igate);
mutex_destroy(&vdev->ioeventfds_lock); mutex_destroy(&vdev->ioeventfds_lock);
mutex_destroy(&vdev->vma_lock);
kfree(vdev->region); kfree(vdev->region);
kfree(vdev->pm_save); kfree(vdev->pm_save);
} }
...@@ -2497,26 +2372,15 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) ...@@ -2497,26 +2372,15 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
return ret; return ret;
} }
/*
* We need to get memory_lock for each device, but devices can share mmap_lock,
* therefore we need to zap and hold the vma_lock for each device, and only then
* get each memory_lock.
*/
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
struct vfio_pci_group_info *groups, struct vfio_pci_group_info *groups,
struct iommufd_ctx *iommufd_ctx) struct iommufd_ctx *iommufd_ctx)
{ {
struct vfio_pci_core_device *cur_mem; struct vfio_pci_core_device *vdev;
struct vfio_pci_core_device *cur_vma;
struct vfio_pci_core_device *cur;
struct pci_dev *pdev; struct pci_dev *pdev;
bool is_mem = true;
int ret; int ret;
mutex_lock(&dev_set->lock); mutex_lock(&dev_set->lock);
cur_mem = list_first_entry(&dev_set->device_list,
struct vfio_pci_core_device,
vdev.dev_set_list);
pdev = vfio_pci_dev_set_resettable(dev_set); pdev = vfio_pci_dev_set_resettable(dev_set);
if (!pdev) { if (!pdev) {
...@@ -2533,7 +2397,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, ...@@ -2533,7 +2397,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
if (ret) if (ret)
goto err_unlock; goto err_unlock;
list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) {
bool owned; bool owned;
/* /*
...@@ -2557,38 +2421,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, ...@@ -2557,38 +2421,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
* Otherwise, reset is not allowed. * Otherwise, reset is not allowed.
*/ */
if (iommufd_ctx) { if (iommufd_ctx) {
int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev, int devid = vfio_iommufd_get_dev_id(&vdev->vdev,
iommufd_ctx); iommufd_ctx);
owned = (devid > 0 || devid == -ENOENT); owned = (devid > 0 || devid == -ENOENT);
} else { } else {
owned = vfio_dev_in_groups(&cur_vma->vdev, groups); owned = vfio_dev_in_groups(&vdev->vdev, groups);
} }
if (!owned) { if (!owned) {
ret = -EINVAL; ret = -EINVAL;
goto err_undo; break;
} }
/* /*
* Locking multiple devices is prone to deadlock, runaway and * Take the memory write lock for each device and zap BAR
* unwind if we hit contention. * mappings to prevent the user accessing the device while in
* reset. Locking multiple devices is prone to deadlock,
* runaway and unwind if we hit contention.
*/ */
if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { if (!down_write_trylock(&vdev->memory_lock)) {
ret = -EBUSY; ret = -EBUSY;
goto err_undo; break;
} }
vfio_pci_zap_bars(vdev);
} }
cur_vma = NULL;
list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { if (!list_entry_is_head(vdev,
if (!down_write_trylock(&cur_mem->memory_lock)) { &dev_set->device_list, vdev.dev_set_list)) {
ret = -EBUSY; vdev = list_prev_entry(vdev, vdev.dev_set_list);
goto err_undo; goto err_undo;
} }
mutex_unlock(&cur_mem->vma_lock);
}
cur_mem = NULL;
/* /*
* The pci_reset_bus() will reset all the devices in the bus. * The pci_reset_bus() will reset all the devices in the bus.
...@@ -2599,25 +2463,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, ...@@ -2599,25 +2463,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
* cause the PCI config space reset without restoring the original * cause the PCI config space reset without restoring the original
* state (saved locally in 'vdev->pm_save'). * state (saved locally in 'vdev->pm_save').
*/ */
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
vfio_pci_set_power_state(cur, PCI_D0); vfio_pci_set_power_state(vdev, PCI_D0);
ret = pci_reset_bus(pdev); ret = pci_reset_bus(pdev);
vdev = list_last_entry(&dev_set->device_list,
struct vfio_pci_core_device, vdev.dev_set_list);
err_undo: err_undo:
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
if (cur == cur_mem) vdev.dev_set_list)
is_mem = false; up_write(&vdev->memory_lock);
if (cur == cur_vma)
break; list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
if (is_mem) pm_runtime_put(&vdev->pdev->dev);
up_write(&cur->memory_lock);
else
mutex_unlock(&cur->vma_lock);
}
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
pm_runtime_put(&cur->pdev->dev);
err_unlock: err_unlock:
mutex_unlock(&dev_set->lock); mutex_unlock(&dev_set->lock);
return ret; return ret;
......
...@@ -22,8 +22,10 @@ ...@@ -22,8 +22,10 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/pseudo_fs.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -43,9 +45,13 @@ ...@@ -43,9 +45,13 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver" #define DRIVER_DESC "VFIO - User Level meta-driver"
#define VFIO_MAGIC 0x5646494f /* "VFIO" */
static struct vfio { static struct vfio {
struct class *device_class; struct class *device_class;
struct ida device_ida; struct ida device_ida;
struct vfsmount *vfs_mount;
int fs_count;
} vfio; } vfio;
#ifdef CONFIG_VFIO_NOIOMMU #ifdef CONFIG_VFIO_NOIOMMU
...@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev) ...@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev)
if (device->ops->release) if (device->ops->release)
device->ops->release(device); device->ops->release(device);
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
kvfree(device); kvfree(device);
} }
...@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, ...@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
} }
EXPORT_SYMBOL_GPL(_vfio_alloc_device); EXPORT_SYMBOL_GPL(_vfio_alloc_device);
static int vfio_fs_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type vfio_fs_type = {
.name = "vfio",
.owner = THIS_MODULE,
.init_fs_context = vfio_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
static struct inode *vfio_fs_inode_new(void)
{
struct inode *inode;
int ret;
ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
if (ret)
return ERR_PTR(ret);
inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
if (IS_ERR(inode))
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
return inode;
}
/* /*
* Initialize a vfio_device so it can be registered to vfio core. * Initialize a vfio_device so it can be registered to vfio core.
*/ */
...@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, ...@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
init_completion(&device->comp); init_completion(&device->comp);
device->dev = dev; device->dev = dev;
device->ops = ops; device->ops = ops;
device->inode = vfio_fs_inode_new();
if (IS_ERR(device->inode)) {
ret = PTR_ERR(device->inode);
goto out_inode;
}
if (ops->init) { if (ops->init) {
ret = ops->init(device); ret = ops->init(device);
...@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, ...@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
return 0; return 0;
out_uninit: out_uninit:
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
out_inode:
vfio_release_device_set(device); vfio_release_device_set(device);
ida_free(&vfio.device_ida, device->index); ida_free(&vfio.device_ida, device->index);
return ret; return ret;
......
...@@ -64,6 +64,7 @@ struct vfio_device { ...@@ -64,6 +64,7 @@ struct vfio_device {
struct completion comp; struct completion comp;
struct iommufd_access *iommufd_access; struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm); void (*put_kvm)(struct kvm *kvm);
struct inode *inode;
#if IS_ENABLED(CONFIG_IOMMUFD) #if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device; struct iommufd_device *iommufd_device;
u8 iommufd_attached:1; u8 iommufd_attached:1;
......
...@@ -93,8 +93,6 @@ struct vfio_pci_core_device { ...@@ -93,8 +93,6 @@ struct vfio_pci_core_device {
struct list_head sriov_pfs_item; struct list_head sriov_pfs_item;
struct vfio_pci_core_device *sriov_pf_core_dev; struct vfio_pci_core_device *sriov_pf_core_dev;
struct notifier_block nb; struct notifier_block nb;
struct mutex vma_lock;
struct list_head vma_list;
struct rw_semaphore memory_lock; struct rw_semaphore memory_lock;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment