Commit 68132b35 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v6.10-rc4' of https://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:
 "Fix long standing lockdep issue of using remap_pfn_range() from the
  vfio-pci fault handler for mapping device MMIO. Commit ba168b52
  ("mm: use rwsem assertion macros for mmap_lock") now exposes this as a
  warning forcing this to be addressed.

  remap_pfn_range() was used here to efficiently map the entire vma, but
  it really never should have been used in the fault handler and doesn't
  handle concurrency, which introduced complex locking. We also needed
  to track vmas mapping the device memory in order to zap those vmas
  when the memory is disabled resulting in a vma list.

  Instead of all that mess, setup an address space on the device fd
  such that we can use unmap_mapping_range() for zapping to avoid the
  tracking overhead and use the standard vmf_insert_pfn() to insert
  mappings on fault.

  For now we'll iterate the vma and opportunistically try to insert
  mappings for the entire vma. This aligns with typical use cases, but
  hopefully in the future we can drop the iterative approach and make
  use of huge_fault instead, once vmf_insert_pfn{pud,pmd}() learn to
  handle pfnmaps"

* tag 'vfio-v6.10-rc4' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Insert full vma on mmap'd MMIO fault
  vfio/pci: Use unmap_mapping_range()
  vfio: Create vfio_fs_type with inode per device
parents c286c21f d71a989c
......@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
filep->private_data = df;
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
return 0;
err_put_registration:
......
......@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
*/
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
if (device->group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current));
......
This diff is collapsed.
......@@ -22,8 +22,10 @@
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/pseudo_fs.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
......@@ -43,9 +45,13 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver"
#define VFIO_MAGIC 0x5646494f /* "VFIO" */
static struct vfio {
struct class *device_class;
struct ida device_ida;
struct vfsmount *vfs_mount;
int fs_count;
} vfio;
#ifdef CONFIG_VFIO_NOIOMMU
......@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev)
if (device->ops->release)
device->ops->release(device);
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
kvfree(device);
}
......@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
}
EXPORT_SYMBOL_GPL(_vfio_alloc_device);
static int vfio_fs_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type vfio_fs_type = {
.name = "vfio",
.owner = THIS_MODULE,
.init_fs_context = vfio_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
static struct inode *vfio_fs_inode_new(void)
{
struct inode *inode;
int ret;
ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
if (ret)
return ERR_PTR(ret);
inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
if (IS_ERR(inode))
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
return inode;
}
/*
* Initialize a vfio_device so it can be registered to vfio core.
*/
......@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
init_completion(&device->comp);
device->dev = dev;
device->ops = ops;
device->inode = vfio_fs_inode_new();
if (IS_ERR(device->inode)) {
ret = PTR_ERR(device->inode);
goto out_inode;
}
if (ops->init) {
ret = ops->init(device);
......@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
return 0;
out_uninit:
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
out_inode:
vfio_release_device_set(device);
ida_free(&vfio.device_ida, device->index);
return ret;
......
......@@ -64,6 +64,7 @@ struct vfio_device {
struct completion comp;
struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm);
struct inode *inode;
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device;
u8 iommufd_attached:1;
......
......@@ -93,8 +93,6 @@ struct vfio_pci_core_device {
struct list_head sriov_pfs_item;
struct vfio_pci_core_device *sriov_pf_core_dev;
struct notifier_block nb;
struct mutex vma_lock;
struct list_head vma_list;
struct rw_semaphore memory_lock;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment