Commit 8b6f173a authored by Yi Liu's avatar Yi Liu Committed by Alex Williamson

vfio: Add cdev for vfio_device

This adds cdev support for vfio_device. It allows the user to directly
open a vfio device w/o using the legacy container/group interface, as a
prerequisite for supporting new iommu features like nested translation
and etc.

The device fd opened in this manner doesn't have the capability to access
the device as the fops open() doesn't open the device until the successful
VFIO_DEVICE_BIND_IOMMUFD ioctl which will be added in a later patch.

With this patch, devices registered to vfio core would have both the legacy
group and the new device interfaces created.

- group interface : /dev/vfio/$groupID
- device interface: /dev/vfio/devices/vfioX - normal device
		    ("X" is a unique number across vfio devices)

For a given device, the user can identify the matching vfioX by searching
the vfio-dev folder under the sysfs path of the device. Take PCI device
(0000:6a:01.0) as an example, /sys/bus/pci/devices/0000\:6a\:01.0/vfio-dev/vfioX
implies the matching vfioX under /dev/vfio/devices/, and vfio-dev/vfioX/dev
contains the major:minor number of the matching /dev/vfio/devices/vfioX.
The user can get device fd by opening the /dev/vfio/devices/vfioX.

The vfio_device cdev logic in this patch:
*) __vfio_register_dev() path ends up doing cdev_device_add() for each
   vfio_device if VFIO_DEVICE_CDEV configured.
*) vfio_unregister_group_dev() path does cdev_device_del();

cdev interface does not support noiommu devices, so VFIO only creates the
legacy group interface for the physical devices that do not have IOMMU.
noiommu users should use the legacy group interface.
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Tested-by: default avatarTerrence Xu <terrence.xu@intel.com>
Tested-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Tested-by: default avatarMatthew Rosato <mjrosato@linux.ibm.com>
Tested-by: default avatarYanting Jiang <yanting.jiang@intel.com>
Tested-by: default avatarShameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Tested-by: default avatarZhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: default avatarYi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20230718135551.6592-19-yi.l.liu@intel.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 38c24544
...@@ -12,6 +12,18 @@ menuconfig VFIO ...@@ -12,6 +12,18 @@ menuconfig VFIO
If you don't know what to do here, say N. If you don't know what to do here, say N.
if VFIO if VFIO
config VFIO_DEVICE_CDEV
bool "Support for the VFIO cdev /dev/vfio/devices/vfioX"
depends on IOMMUFD && !SPAPR_TCE_IOMMU
help
The VFIO device cdev is another way for userspace to get device
access. Userspace gets device fd by opening device cdev under
/dev/vfio/devices/vfioX, and then bind the device fd with an iommufd
to set up secure DMA context for device access. This interface does
not support noiommu.
If you don't know what to do here, say N.
config VFIO_CONTAINER config VFIO_CONTAINER
bool "Support for the VFIO container /dev/vfio/vfio" bool "Support for the VFIO container /dev/vfio/vfio"
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
......
...@@ -4,6 +4,7 @@ obj-$(CONFIG_VFIO) += vfio.o ...@@ -4,6 +4,7 @@ obj-$(CONFIG_VFIO) += vfio.o
vfio-y += vfio_main.o \ vfio-y += vfio_main.o \
group.o \ group.o \
iova_bitmap.o iova_bitmap.o
vfio-$(CONFIG_VFIO_DEVICE_CDEV) += device_cdev.o
vfio-$(CONFIG_IOMMUFD) += iommufd.o vfio-$(CONFIG_IOMMUFD) += iommufd.o
vfio-$(CONFIG_VFIO_CONTAINER) += container.o vfio-$(CONFIG_VFIO_CONTAINER) += container.o
vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2023 Intel Corporation.
*/
#include <linux/vfio.h>
#include "vfio.h"
static dev_t device_devt;
void vfio_init_device_cdev(struct vfio_device *device)
{
device->device.devt = MKDEV(MAJOR(device_devt), device->index);
cdev_init(&device->cdev, &vfio_device_fops);
device->cdev.owner = THIS_MODULE;
}
/*
* device access via the fd opened by this function is blocked until
* .open_device() is called successfully during BIND_IOMMUFD.
*/
int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
{
struct vfio_device *device = container_of(inode->i_cdev,
struct vfio_device, cdev);
struct vfio_device_file *df;
int ret;
/* Paired with the put in vfio_device_fops_release() */
if (!vfio_device_try_get_registration(device))
return -ENODEV;
df = vfio_allocate_device_file(device);
if (IS_ERR(df)) {
ret = PTR_ERR(df);
goto err_put_registration;
}
filep->private_data = df;
return 0;
err_put_registration:
vfio_device_put_registration(device);
return ret;
}
static char *vfio_device_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev));
}
int vfio_cdev_init(struct class *device_class)
{
device_class->devnode = vfio_device_devnode;
return alloc_chrdev_region(&device_devt, 0,
MINORMASK + 1, "vfio-dev");
}
void vfio_cdev_cleanup(void)
{
unregister_chrdev_region(device_devt, MINORMASK + 1);
}
...@@ -266,6 +266,60 @@ vfio_iommufd_compat_attach_ioas(struct vfio_device *device, ...@@ -266,6 +266,60 @@ vfio_iommufd_compat_attach_ioas(struct vfio_device *device,
} }
#endif #endif
#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
void vfio_init_device_cdev(struct vfio_device *device);
static inline int vfio_device_add(struct vfio_device *device)
{
/* cdev does not support noiommu device */
if (vfio_device_is_noiommu(device))
return device_add(&device->device);
vfio_init_device_cdev(device);
return cdev_device_add(&device->cdev, &device->device);
}
static inline void vfio_device_del(struct vfio_device *device)
{
if (vfio_device_is_noiommu(device))
device_del(&device->device);
else
cdev_device_del(&device->cdev, &device->device);
}
int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep);
int vfio_cdev_init(struct class *device_class);
void vfio_cdev_cleanup(void);
#else
static inline void vfio_init_device_cdev(struct vfio_device *device)
{
}
static inline int vfio_device_add(struct vfio_device *device)
{
return device_add(&device->device);
}
static inline void vfio_device_del(struct vfio_device *device)
{
device_del(&device->device);
}
static inline int vfio_device_fops_cdev_open(struct inode *inode,
struct file *filep)
{
return 0;
}
static inline int vfio_cdev_init(struct class *device_class)
{
return 0;
}
static inline void vfio_cdev_cleanup(void)
{
}
#endif /* CONFIG_VFIO_DEVICE_CDEV */
#if IS_ENABLED(CONFIG_VFIO_VIRQFD) #if IS_ENABLED(CONFIG_VFIO_VIRQFD)
int __init vfio_virqfd_init(void); int __init vfio_virqfd_init(void);
void vfio_virqfd_exit(void); void vfio_virqfd_exit(void);
......
...@@ -292,7 +292,7 @@ static int __vfio_register_dev(struct vfio_device *device, ...@@ -292,7 +292,7 @@ static int __vfio_register_dev(struct vfio_device *device,
if (ret) if (ret)
return ret; return ret;
ret = device_add(&device->device); ret = vfio_device_add(device);
if (ret) if (ret)
goto err_out; goto err_out;
...@@ -338,8 +338,11 @@ void vfio_unregister_group_dev(struct vfio_device *device) ...@@ -338,8 +338,11 @@ void vfio_unregister_group_dev(struct vfio_device *device)
*/ */
vfio_device_group_unregister(device); vfio_device_group_unregister(device);
/* Balances device_add in register path */ /*
device_del(&device->device); * Balances vfio_device_add() in register path, also prevents
* new device opened by userspace in the cdev path.
*/
vfio_device_del(device);
vfio_device_put_registration(device); vfio_device_put_registration(device);
rc = try_wait_for_completion(&device->comp); rc = try_wait_for_completion(&device->comp);
...@@ -567,7 +570,8 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) ...@@ -567,7 +570,8 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
struct vfio_device_file *df = filep->private_data; struct vfio_device_file *df = filep->private_data;
struct vfio_device *device = df->device; struct vfio_device *device = df->device;
vfio_df_group_close(df); if (df->group)
vfio_df_group_close(df);
vfio_device_put_registration(device); vfio_device_put_registration(device);
...@@ -1216,6 +1220,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) ...@@ -1216,6 +1220,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
const struct file_operations vfio_device_fops = { const struct file_operations vfio_device_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = vfio_device_fops_cdev_open,
.release = vfio_device_fops_release, .release = vfio_device_fops_release,
.read = vfio_device_fops_read, .read = vfio_device_fops_read,
.write = vfio_device_fops_write, .write = vfio_device_fops_write,
...@@ -1567,9 +1572,16 @@ static int __init vfio_init(void) ...@@ -1567,9 +1572,16 @@ static int __init vfio_init(void)
goto err_dev_class; goto err_dev_class;
} }
ret = vfio_cdev_init(vfio.device_class);
if (ret)
goto err_alloc_dev_chrdev;
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0; return 0;
err_alloc_dev_chrdev:
class_destroy(vfio.device_class);
vfio.device_class = NULL;
err_dev_class: err_dev_class:
vfio_virqfd_exit(); vfio_virqfd_exit();
err_virqfd: err_virqfd:
...@@ -1580,6 +1592,7 @@ static int __init vfio_init(void) ...@@ -1580,6 +1592,7 @@ static int __init vfio_init(void)
static void __exit vfio_cleanup(void) static void __exit vfio_cleanup(void)
{ {
ida_destroy(&vfio.device_ida); ida_destroy(&vfio.device_ida);
vfio_cdev_cleanup();
class_destroy(vfio.device_class); class_destroy(vfio.device_class);
vfio.device_class = NULL; vfio.device_class = NULL;
vfio_virqfd_exit(); vfio_virqfd_exit();
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/cdev.h>
#include <uapi/linux/vfio.h> #include <uapi/linux/vfio.h>
#include <linux/iova_bitmap.h> #include <linux/iova_bitmap.h>
...@@ -51,6 +52,9 @@ struct vfio_device { ...@@ -51,6 +52,9 @@ struct vfio_device {
/* Members below here are private, not for driver use */ /* Members below here are private, not for driver use */
unsigned int index; unsigned int index;
struct device device; /* device.kref covers object life circle */ struct device device; /* device.kref covers object life circle */
#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
struct cdev cdev;
#endif
refcount_t refcount; /* user count on registered device*/ refcount_t refcount; /* user count on registered device*/
unsigned int open_count; unsigned int open_count;
struct completion comp; struct completion comp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment