Commit b7c5e64f authored by Alex Williamson's avatar Alex Williamson

vfio: Create vfio_fs_type with inode per device

By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.
Suggested-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@redhat.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 1613e604
...@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) ...@@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
filep->private_data = df; filep->private_data = df;
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
return 0; return 0;
err_put_registration: err_put_registration:
......
...@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device) ...@@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
*/ */
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas
* associated to this device using unmap_mapping_range().
*/
filep->f_mapping = device->inode->i_mapping;
if (device->group->type == VFIO_NO_IOMMU) if (device->group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user " dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current)); "(%s:%d)\n", current->comm, task_pid_nr(current));
......
...@@ -22,8 +22,10 @@ ...@@ -22,8 +22,10 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/pseudo_fs.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -43,9 +45,13 @@ ...@@ -43,9 +45,13 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver" #define DRIVER_DESC "VFIO - User Level meta-driver"
#define VFIO_MAGIC 0x5646494f /* "VFIO" */
static struct vfio { static struct vfio {
struct class *device_class; struct class *device_class;
struct ida device_ida; struct ida device_ida;
struct vfsmount *vfs_mount;
int fs_count;
} vfio; } vfio;
#ifdef CONFIG_VFIO_NOIOMMU #ifdef CONFIG_VFIO_NOIOMMU
...@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev) ...@@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev)
if (device->ops->release) if (device->ops->release)
device->ops->release(device); device->ops->release(device);
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
kvfree(device); kvfree(device);
} }
...@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, ...@@ -228,6 +236,34 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
} }
EXPORT_SYMBOL_GPL(_vfio_alloc_device); EXPORT_SYMBOL_GPL(_vfio_alloc_device);
static int vfio_fs_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type vfio_fs_type = {
.name = "vfio",
.owner = THIS_MODULE,
.init_fs_context = vfio_fs_init_fs_context,
.kill_sb = kill_anon_super,
};
static struct inode *vfio_fs_inode_new(void)
{
struct inode *inode;
int ret;
ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
if (ret)
return ERR_PTR(ret);
inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
if (IS_ERR(inode))
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
return inode;
}
/* /*
* Initialize a vfio_device so it can be registered to vfio core. * Initialize a vfio_device so it can be registered to vfio core.
*/ */
...@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, ...@@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
init_completion(&device->comp); init_completion(&device->comp);
device->dev = dev; device->dev = dev;
device->ops = ops; device->ops = ops;
device->inode = vfio_fs_inode_new();
if (IS_ERR(device->inode)) {
ret = PTR_ERR(device->inode);
goto out_inode;
}
if (ops->init) { if (ops->init) {
ret = ops->init(device); ret = ops->init(device);
...@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, ...@@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev,
return 0; return 0;
out_uninit: out_uninit:
iput(device->inode);
simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
out_inode:
vfio_release_device_set(device); vfio_release_device_set(device);
ida_free(&vfio.device_ida, device->index); ida_free(&vfio.device_ida, device->index);
return ret; return ret;
......
...@@ -64,6 +64,7 @@ struct vfio_device { ...@@ -64,6 +64,7 @@ struct vfio_device {
struct completion comp; struct completion comp;
struct iommufd_access *iommufd_access; struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm); void (*put_kvm)(struct kvm *kvm);
struct inode *inode;
#if IS_ENABLED(CONFIG_IOMMUFD) #if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device; struct iommufd_device *iommufd_device;
u8 iommufd_attached:1; u8 iommufd_attached:1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment