Commit 8cfc5b60 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Alex Williamson

vfio: Replace the iommu notifier with a device list

Instead of bouncing the function call to the driver op through a blocking
notifier just have the iommu layer call it directly.

Register each device that is being attached to the iommu with the lower
driver which then threads them on a linked list and calls the appropriate
driver op at the right time.

Currently the only use is if dma_unmap() is defined.

Also, fully lock all the debugging tests on the pinning path that a
dma_unmap is registered.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent ce4b4657
...@@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) ...@@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{ {
struct vfio_iommu_driver *driver, *tmp; struct vfio_iommu_driver *driver, *tmp;
if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) if (WARN_ON(!ops->register_device != !ops->unregister_device))
return -EINVAL; return -EINVAL;
driver = kzalloc(sizeof(*driver), GFP_KERNEL); driver = kzalloc(sizeof(*driver), GFP_KERNEL);
...@@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device) ...@@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem); up_write(&device->group->group_rwsem);
} }
static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
void *data)
{
struct vfio_device *vfio_device =
container_of(nb, struct vfio_device, iommu_nb);
struct vfio_iommu_type1_dma_unmap *unmap = data;
vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
return NOTIFY_OK;
}
static struct file *vfio_device_open(struct vfio_device *device) static struct file *vfio_device_open(struct vfio_device *device)
{ {
struct vfio_iommu_driver *iommu_driver; struct vfio_iommu_driver *iommu_driver;
...@@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device) ...@@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
} }
iommu_driver = device->group->container->iommu_driver; iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver && if (iommu_driver && iommu_driver->ops->register_device)
iommu_driver->ops->register_notifier) { iommu_driver->ops->register_device(
unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; device->group->container->iommu_data, device);
device->iommu_nb.notifier_call = vfio_iommu_notifier;
iommu_driver->ops->register_notifier(
device->group->container->iommu_data, &events,
&device->iommu_nb);
}
up_read(&device->group->group_rwsem); up_read(&device->group->group_rwsem);
} }
...@@ -1176,11 +1159,9 @@ static struct file *vfio_device_open(struct vfio_device *device) ...@@ -1176,11 +1159,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
device->ops->close_device(device); device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver; iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver && if (iommu_driver && iommu_driver->ops->unregister_device)
iommu_driver->ops->unregister_notifier) iommu_driver->ops->unregister_device(
iommu_driver->ops->unregister_notifier( device->group->container->iommu_data, device);
device->group->container->iommu_data,
&device->iommu_nb);
} }
err_undo_count: err_undo_count:
up_read(&device->group->group_rwsem); up_read(&device->group->group_rwsem);
...@@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) ...@@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
device->ops->close_device(device); device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver; iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver && if (iommu_driver && iommu_driver->ops->unregister_device)
iommu_driver->ops->unregister_notifier) iommu_driver->ops->unregister_device(
iommu_driver->ops->unregister_notifier( device->group->container->iommu_data, device);
device->group->container->iommu_data,
&device->iommu_nb);
up_read(&device->group->group_rwsem); up_read(&device->group->group_rwsem);
device->open_count--; device->open_count--;
if (device->open_count == 0) if (device->open_count == 0)
......
...@@ -33,9 +33,6 @@ enum vfio_iommu_notify_type { ...@@ -33,9 +33,6 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0, VFIO_IOMMU_CONTAINER_CLOSE = 0,
}; };
/* events for register_notifier() */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
/** /**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/ */
...@@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops { ...@@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops {
unsigned long *phys_pfn); unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data, int (*unpin_pages)(void *iommu_data,
unsigned long *user_pfn, int npage); unsigned long *user_pfn, int npage);
int (*register_notifier)(void *iommu_data, void (*register_device)(void *iommu_data,
unsigned long *events, struct vfio_device *vdev);
struct notifier_block *nb); void (*unregister_device)(void *iommu_data,
int (*unregister_notifier)(void *iommu_data, struct vfio_device *vdev);
struct notifier_block *nb);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write); void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
......
...@@ -67,7 +67,8 @@ struct vfio_iommu { ...@@ -67,7 +67,8 @@ struct vfio_iommu {
struct list_head iova_list; struct list_head iova_list;
struct mutex lock; struct mutex lock;
struct rb_root dma_list; struct rb_root dma_list;
struct blocking_notifier_head notifier; struct list_head device_list;
struct mutex device_list_lock;
unsigned int dma_avail; unsigned int dma_avail;
unsigned int vaddr_invalid_count; unsigned int vaddr_invalid_count;
uint64_t pgsize_bitmap; uint64_t pgsize_bitmap;
...@@ -865,8 +866,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, ...@@ -865,8 +866,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
} }
} }
/* Fail if notifier list is empty */ /* Fail if no dma_umap notifier is registered */
if (!iommu->notifier.head) { if (list_empty(&iommu->device_list)) {
ret = -EINVAL; ret = -EINVAL;
goto pin_done; goto pin_done;
} }
...@@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size) ...@@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
return 0; return 0;
} }
/*
* Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
* and unmap iovas within the range we're about to unmap. Drivers MUST unpin
* pages in response to an invalidation.
*/
static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
struct vfio_dma *dma)
{
struct vfio_device *device;
if (list_empty(&iommu->device_list))
return;
/*
* The device is expected to call vfio_unpin_pages() for any IOVA it has
* pinned within the range. Since vfio_unpin_pages() will eventually
* call back down to this code and try to obtain the iommu->lock we must
* drop it.
*/
mutex_lock(&iommu->device_list_lock);
mutex_unlock(&iommu->lock);
list_for_each_entry(device, &iommu->device_list, iommu_entry)
device->ops->dma_unmap(device, dma->iova, dma->size);
mutex_unlock(&iommu->device_list_lock);
mutex_lock(&iommu->lock);
}
static int vfio_dma_do_unmap(struct vfio_iommu *iommu, static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap, struct vfio_iommu_type1_dma_unmap *unmap,
struct vfio_bitmap *bitmap) struct vfio_bitmap *bitmap)
...@@ -1400,8 +1430,6 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, ...@@ -1400,8 +1430,6 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
} }
if (!RB_EMPTY_ROOT(&dma->pfn_list)) { if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
struct vfio_iommu_type1_dma_unmap nb_unmap;
if (dma_last == dma) { if (dma_last == dma) {
BUG_ON(++retries > 10); BUG_ON(++retries > 10);
} else { } else {
...@@ -1409,20 +1437,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, ...@@ -1409,20 +1437,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
retries = 0; retries = 0;
} }
nb_unmap.iova = dma->iova; vfio_notify_dma_unmap(iommu, dma);
nb_unmap.size = dma->size;
/*
* Notify anyone (mdev vendor drivers) to invalidate and
* unmap iovas within the range we're about to unmap.
* Vendor drivers MUST unpin pages in response to an
* invalidation.
*/
mutex_unlock(&iommu->lock);
blocking_notifier_call_chain(&iommu->notifier,
VFIO_IOMMU_NOTIFY_DMA_UNMAP,
&nb_unmap);
mutex_lock(&iommu->lock);
goto again; goto again;
} }
...@@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&iommu->emulated_iommu_groups) && if (list_empty(&iommu->emulated_iommu_groups) &&
list_empty(&iommu->domain_list)) { list_empty(&iommu->domain_list)) {
WARN_ON(iommu->notifier.head); WARN_ON(!list_empty(&iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu); vfio_iommu_unmap_unpin_all(iommu);
} }
goto detach_group_done; goto detach_group_done;
...@@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&domain->group_list)) { if (list_empty(&domain->group_list)) {
if (list_is_singular(&iommu->domain_list)) { if (list_is_singular(&iommu->domain_list)) {
if (list_empty(&iommu->emulated_iommu_groups)) { if (list_empty(&iommu->emulated_iommu_groups)) {
WARN_ON(iommu->notifier.head); WARN_ON(!list_empty(
&iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu); vfio_iommu_unmap_unpin_all(iommu);
} else { } else {
vfio_iommu_unmap_unpin_reaccount(iommu); vfio_iommu_unmap_unpin_reaccount(iommu);
...@@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg) ...@@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
iommu->dma_avail = dma_entry_limit; iommu->dma_avail = dma_entry_limit;
iommu->container_open = true; iommu->container_open = true;
mutex_init(&iommu->lock); mutex_init(&iommu->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); mutex_init(&iommu->device_list_lock);
INIT_LIST_HEAD(&iommu->device_list);
init_waitqueue_head(&iommu->vaddr_wait); init_waitqueue_head(&iommu->vaddr_wait);
iommu->pgsize_bitmap = PAGE_MASK; iommu->pgsize_bitmap = PAGE_MASK;
INIT_LIST_HEAD(&iommu->emulated_iommu_groups); INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
...@@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, ...@@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} }
} }
static int vfio_iommu_type1_register_notifier(void *iommu_data, static void vfio_iommu_type1_register_device(void *iommu_data,
unsigned long *events, struct vfio_device *vdev)
struct notifier_block *nb)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
/* clear known events */ if (!vdev->ops->dma_unmap)
*events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP; return;
/* refuse to register if still events remaining */
if (*events)
return -EINVAL;
return blocking_notifier_chain_register(&iommu->notifier, nb); /*
* list_empty(&iommu->device_list) is tested under the iommu->lock while
* iteration for dma_unmap must be done under the device_list_lock.
* Holding both locks here allows avoiding the device_list_lock in
* several fast paths. See vfio_notify_dma_unmap()
*/
mutex_lock(&iommu->lock);
mutex_lock(&iommu->device_list_lock);
list_add(&vdev->iommu_entry, &iommu->device_list);
mutex_unlock(&iommu->device_list_lock);
mutex_unlock(&iommu->lock);
} }
static int vfio_iommu_type1_unregister_notifier(void *iommu_data, static void vfio_iommu_type1_unregister_device(void *iommu_data,
struct notifier_block *nb) struct vfio_device *vdev)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
return blocking_notifier_chain_unregister(&iommu->notifier, nb); if (!vdev->ops->dma_unmap)
return;
mutex_lock(&iommu->lock);
mutex_lock(&iommu->device_list_lock);
list_del(&vdev->iommu_entry);
mutex_unlock(&iommu->device_list_lock);
mutex_unlock(&iommu->lock);
} }
static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
...@@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { ...@@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.detach_group = vfio_iommu_type1_detach_group, .detach_group = vfio_iommu_type1_detach_group,
.pin_pages = vfio_iommu_type1_pin_pages, .pin_pages = vfio_iommu_type1_pin_pages,
.unpin_pages = vfio_iommu_type1_unpin_pages, .unpin_pages = vfio_iommu_type1_unpin_pages,
.register_notifier = vfio_iommu_type1_register_notifier, .register_device = vfio_iommu_type1_register_device,
.unregister_notifier = vfio_iommu_type1_unregister_notifier, .unregister_device = vfio_iommu_type1_unregister_device,
.dma_rw = vfio_iommu_type1_dma_rw, .dma_rw = vfio_iommu_type1_dma_rw,
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain, .group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
.notify = vfio_iommu_type1_notify, .notify = vfio_iommu_type1_notify,
......
...@@ -49,7 +49,7 @@ struct vfio_device { ...@@ -49,7 +49,7 @@ struct vfio_device {
unsigned int open_count; unsigned int open_count;
struct completion comp; struct completion comp;
struct list_head group_next; struct list_head group_next;
struct notifier_block iommu_nb; struct list_head iommu_entry;
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment