Commit 1ddd0027 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.4-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Fix spapr iommu error case case (Alexey Kardashevskiy)

 - Consolidate region type definitions (Cornelia Huck)

 - Restore saved original PCI state on release (hexin)

 - Simplify mtty sample driver interrupt path (Parav Pandit)

 - Support for reporting valid IOVA regions to user (Shameer Kolothum)

* tag 'vfio-v5.4-rc1' of git://github.com/awilliam/linux-vfio:
  vfio_pci: Restore original state on release
  vfio/type1: remove duplicate retrieval of reserved regions
  vfio/type1: Add IOVA range capability support
  vfio/type1: check dma map request is within a valid iova range
  vfio/spapr_tce: Fix incorrect tce_iommu_group memory free
  vfio-mdev/mtty: Simplify interrupt generation
  vfio: re-arrange vfio region definitions
  vfio/type1: Update iova list on detach
  vfio/type1: Check reserved region conflict and update iova list
  vfio/type1: Introduce iova list and add iommu aperture validity check
parents 45824fc0 e6c5d727
...@@ -438,11 +438,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) ...@@ -438,11 +438,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
/* /*
* Try to reset the device. The success of this is dependent on * Try to get the locks ourselves to prevent a deadlock. The
* being able to lock the device, which is not always possible. * success of this is dependent on being able to lock the device,
* which is not always possible.
* We can not use the "try" reset interface here, which will
* overwrite the previously restored configuration information.
*/ */
if (vdev->reset_works && !pci_try_reset_function(pdev)) if (vdev->reset_works && pci_cfg_access_trylock(pdev)) {
if (device_trylock(&pdev->dev)) {
if (!__pci_reset_function_locked(pdev))
vdev->needs_reset = false; vdev->needs_reset = false;
device_unlock(&pdev->dev);
}
pci_cfg_access_unlock(pdev);
}
pci_restore_state(pdev); pci_restore_state(pdev);
out: out:
......
...@@ -1240,7 +1240,7 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, ...@@ -1240,7 +1240,7 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
static int tce_iommu_attach_group(void *iommu_data, static int tce_iommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group)
{ {
int ret; int ret = 0;
struct tce_container *container = iommu_data; struct tce_container *container = iommu_data;
struct iommu_table_group *table_group; struct iommu_table_group *table_group;
struct tce_iommu_group *tcegrp = NULL; struct tce_iommu_group *tcegrp = NULL;
...@@ -1293,13 +1293,13 @@ static int tce_iommu_attach_group(void *iommu_data, ...@@ -1293,13 +1293,13 @@ static int tce_iommu_attach_group(void *iommu_data,
!table_group->ops->release_ownership) { !table_group->ops->release_ownership) {
if (container->v2) { if (container->v2) {
ret = -EPERM; ret = -EPERM;
goto unlock_exit; goto free_exit;
} }
ret = tce_iommu_take_ownership(container, table_group); ret = tce_iommu_take_ownership(container, table_group);
} else { } else {
if (!container->v2) { if (!container->v2) {
ret = -EPERM; ret = -EPERM;
goto unlock_exit; goto free_exit;
} }
ret = tce_iommu_take_ownership_ddw(container, table_group); ret = tce_iommu_take_ownership_ddw(container, table_group);
if (!tce_groups_attached(container) && !container->tables[0]) if (!tce_groups_attached(container) && !container->tables[0])
...@@ -1311,10 +1311,11 @@ static int tce_iommu_attach_group(void *iommu_data, ...@@ -1311,10 +1311,11 @@ static int tce_iommu_attach_group(void *iommu_data,
list_add(&tcegrp->next, &container->group_list); list_add(&tcegrp->next, &container->group_list);
} }
unlock_exit: free_exit:
if (ret && tcegrp) if (ret && tcegrp)
kfree(tcegrp); kfree(tcegrp);
unlock_exit:
mutex_unlock(&container->lock); mutex_unlock(&container->lock);
return ret; return ret;
......
...@@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit, ...@@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit,
struct vfio_iommu { struct vfio_iommu {
struct list_head domain_list; struct list_head domain_list;
struct list_head iova_list;
struct vfio_domain *external_domain; /* domain for external user */ struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock; struct mutex lock;
struct rb_root dma_list; struct rb_root dma_list;
...@@ -97,6 +98,12 @@ struct vfio_group { ...@@ -97,6 +98,12 @@ struct vfio_group {
bool mdev_group; /* An mdev group */ bool mdev_group; /* An mdev group */
}; };
struct vfio_iova {
struct list_head list;
dma_addr_t start;
dma_addr_t end;
};
/* /*
* Guest RAM pinning working set or DMA target * Guest RAM pinning working set or DMA target
*/ */
...@@ -1038,6 +1045,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, ...@@ -1038,6 +1045,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
return ret; return ret;
} }
/*
* Check dma map request is within a valid iova range
*/
static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
dma_addr_t start, dma_addr_t end)
{
struct list_head *iova = &iommu->iova_list;
struct vfio_iova *node;
list_for_each_entry(node, iova, list) {
if (start >= node->start && end <= node->end)
return true;
}
/*
* Check for list_empty() as well since a container with
* a single mdev device will have an empty list.
*/
return list_empty(iova);
}
static int vfio_dma_do_map(struct vfio_iommu *iommu, static int vfio_dma_do_map(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_map *map) struct vfio_iommu_type1_dma_map *map)
{ {
...@@ -1081,6 +1109,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, ...@@ -1081,6 +1109,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
goto out_unlock; goto out_unlock;
} }
if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) {
ret = -EINVAL;
goto out_unlock;
}
dma = kzalloc(sizeof(*dma), GFP_KERNEL); dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) { if (!dma) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1270,15 +1303,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, ...@@ -1270,15 +1303,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
return NULL; return NULL;
} }
static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
phys_addr_t *base)
{ {
struct list_head group_resv_regions; struct iommu_resv_region *region;
struct iommu_resv_region *region, *next;
bool ret = false; bool ret = false;
INIT_LIST_HEAD(&group_resv_regions); list_for_each_entry(region, group_resv_regions, list) {
iommu_get_group_resv_regions(group, &group_resv_regions);
list_for_each_entry(region, &group_resv_regions, list) {
/* /*
* The presence of any 'real' MSI regions should take * The presence of any 'real' MSI regions should take
* precedence over the software-managed one if the * precedence over the software-managed one if the
...@@ -1294,8 +1325,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) ...@@ -1294,8 +1325,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
ret = true; ret = true;
} }
} }
list_for_each_entry_safe(region, next, &group_resv_regions, list)
kfree(region);
return ret; return ret;
} }
...@@ -1395,6 +1425,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data) ...@@ -1395,6 +1425,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data)
return 0; return 0;
} }
/*
* This is a helper function to insert an address range to iova list.
* The list is initially created with a single entry corresponding to
* the IOMMU domain geometry to which the device group is attached.
* The list aperture gets modified when a new domain is added to the
* container if the new aperture doesn't conflict with the current one
* or with any existing dma mappings. The list is also modified to
* exclude any reserved regions associated with the device group.
*/
static int vfio_iommu_iova_insert(struct list_head *head,
dma_addr_t start, dma_addr_t end)
{
struct vfio_iova *region;
region = kmalloc(sizeof(*region), GFP_KERNEL);
if (!region)
return -ENOMEM;
INIT_LIST_HEAD(&region->list);
region->start = start;
region->end = end;
list_add_tail(&region->list, head);
return 0;
}
/*
* Check the new iommu aperture conflicts with existing aper or with any
* existing dma mappings.
*/
static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
dma_addr_t start, dma_addr_t end)
{
struct vfio_iova *first, *last;
struct list_head *iova = &iommu->iova_list;
if (list_empty(iova))
return false;
/* Disjoint sets, return conflict */
first = list_first_entry(iova, struct vfio_iova, list);
last = list_last_entry(iova, struct vfio_iova, list);
if (start > last->end || end < first->start)
return true;
/* Check for any existing dma mappings below the new start */
if (start > first->start) {
if (vfio_find_dma(iommu, first->start, start - first->start))
return true;
}
/* Check for any existing dma mappings beyond the new end */
if (end < last->end) {
if (vfio_find_dma(iommu, end + 1, last->end - end))
return true;
}
return false;
}
/*
* Resize iommu iova aperture window. This is called only if the new
* aperture has no conflict with existing aperture and dma mappings.
*/
static int vfio_iommu_aper_resize(struct list_head *iova,
dma_addr_t start, dma_addr_t end)
{
struct vfio_iova *node, *next;
if (list_empty(iova))
return vfio_iommu_iova_insert(iova, start, end);
/* Adjust iova list start */
list_for_each_entry_safe(node, next, iova, list) {
if (start < node->start)
break;
if (start >= node->start && start < node->end) {
node->start = start;
break;
}
/* Delete nodes before new start */
list_del(&node->list);
kfree(node);
}
/* Adjust iova list end */
list_for_each_entry_safe(node, next, iova, list) {
if (end > node->end)
continue;
if (end > node->start && end <= node->end) {
node->end = end;
continue;
}
/* Delete nodes after new end */
list_del(&node->list);
kfree(node);
}
return 0;
}
/*
* Check reserved region conflicts with existing dma mappings
*/
static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
struct list_head *resv_regions)
{
struct iommu_resv_region *region;
/* Check for conflict with existing dma mappings */
list_for_each_entry(region, resv_regions, list) {
if (region->type == IOMMU_RESV_DIRECT_RELAXABLE)
continue;
if (vfio_find_dma(iommu, region->start, region->length))
return true;
}
return false;
}
/*
* Check iova region overlap with reserved regions and
* exclude them from the iommu iova range
*/
static int vfio_iommu_resv_exclude(struct list_head *iova,
struct list_head *resv_regions)
{
struct iommu_resv_region *resv;
struct vfio_iova *n, *next;
list_for_each_entry(resv, resv_regions, list) {
phys_addr_t start, end;
if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
continue;
start = resv->start;
end = resv->start + resv->length - 1;
list_for_each_entry_safe(n, next, iova, list) {
int ret = 0;
/* No overlap */
if (start > n->end || end < n->start)
continue;
/*
* Insert a new node if current node overlaps with the
* reserve region to exlude that from valid iova range.
* Note that, new node is inserted before the current
* node and finally the current node is deleted keeping
* the list updated and sorted.
*/
if (start > n->start)
ret = vfio_iommu_iova_insert(&n->list, n->start,
start - 1);
if (!ret && end < n->end)
ret = vfio_iommu_iova_insert(&n->list, end + 1,
n->end);
if (ret)
return ret;
list_del(&n->list);
kfree(n);
}
}
if (list_empty(iova))
return -EINVAL;
return 0;
}
static void vfio_iommu_resv_free(struct list_head *resv_regions)
{
struct iommu_resv_region *n, *next;
list_for_each_entry_safe(n, next, resv_regions, list) {
list_del(&n->list);
kfree(n);
}
}
static void vfio_iommu_iova_free(struct list_head *iova)
{
struct vfio_iova *n, *next;
list_for_each_entry_safe(n, next, iova, list) {
list_del(&n->list);
kfree(n);
}
}
static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
struct list_head *iova_copy)
{
struct list_head *iova = &iommu->iova_list;
struct vfio_iova *n;
int ret;
list_for_each_entry(n, iova, list) {
ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end);
if (ret)
goto out_free;
}
return 0;
out_free:
vfio_iommu_iova_free(iova_copy);
return ret;
}
static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
struct list_head *iova_copy)
{
struct list_head *iova = &iommu->iova_list;
vfio_iommu_iova_free(iova);
list_splice_tail(iova_copy, iova);
}
static int vfio_iommu_type1_attach_group(void *iommu_data, static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group)
{ {
...@@ -1405,6 +1657,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -1405,6 +1657,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
int ret; int ret;
bool resv_msi, msi_remap; bool resv_msi, msi_remap;
phys_addr_t resv_msi_base; phys_addr_t resv_msi_base;
struct iommu_domain_geometry geo;
LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions);
mutex_lock(&iommu->lock); mutex_lock(&iommu->lock);
...@@ -1481,7 +1736,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -1481,7 +1736,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret) if (ret)
goto out_domain; goto out_domain;
resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); /* Get aperture info */
iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
if (vfio_iommu_aper_conflict(iommu, geo.aperture_start,
geo.aperture_end)) {
ret = -EINVAL;
goto out_detach;
}
ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
if (ret)
goto out_detach;
if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
ret = -EINVAL;
goto out_detach;
}
/*
* We don't want to work on the original iova list as the list
* gets modified and in case of failure we have to retain the
* original list. Get a copy here.
*/
ret = vfio_iommu_iova_get_copy(iommu, &iova_copy);
if (ret)
goto out_detach;
ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start,
geo.aperture_end);
if (ret)
goto out_detach;
ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions);
if (ret)
goto out_detach;
resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
INIT_LIST_HEAD(&domain->group_list); INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list); list_add(&group->next, &domain->group_list);
...@@ -1514,8 +1805,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -1514,8 +1805,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next, &d->group_list); list_add(&group->next, &d->group_list);
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
kfree(domain); kfree(domain);
mutex_unlock(&iommu->lock); goto done;
return 0;
} }
ret = vfio_iommu_attach_group(domain, group); ret = vfio_iommu_attach_group(domain, group);
...@@ -1538,8 +1828,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -1538,8 +1828,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
} }
list_add(&domain->next, &iommu->domain_list); list_add(&domain->next, &iommu->domain_list);
done:
/* Delete the old one and insert new iova list */
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
mutex_unlock(&iommu->lock); mutex_unlock(&iommu->lock);
vfio_iommu_resv_free(&group_resv_regions);
return 0; return 0;
...@@ -1547,6 +1840,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -1547,6 +1840,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
vfio_iommu_detach_group(domain, group); vfio_iommu_detach_group(domain, group);
out_domain: out_domain:
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
vfio_iommu_iova_free(&iova_copy);
vfio_iommu_resv_free(&group_resv_regions);
out_free: out_free:
kfree(domain); kfree(domain);
kfree(group); kfree(group);
...@@ -1602,12 +1897,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu) ...@@ -1602,12 +1897,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
WARN_ON(iommu->notifier.head); WARN_ON(iommu->notifier.head);
} }
/*
* Called when a domain is removed in detach. It is possible that
* the removed domain decided the iova aperture window. Modify the
* iova aperture with the smallest window among existing domains.
*/
static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
struct list_head *iova_copy)
{
struct vfio_domain *domain;
struct iommu_domain_geometry geo;
struct vfio_iova *node;
dma_addr_t start = 0;
dma_addr_t end = (dma_addr_t)~0;
if (list_empty(iova_copy))
return;
list_for_each_entry(domain, &iommu->domain_list, next) {
iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
&geo);
if (geo.aperture_start > start)
start = geo.aperture_start;
if (geo.aperture_end < end)
end = geo.aperture_end;
}
/* Modify aperture limits. The new aper is either same or bigger */
node = list_first_entry(iova_copy, struct vfio_iova, list);
node->start = start;
node = list_last_entry(iova_copy, struct vfio_iova, list);
node->end = end;
}
/*
* Called when a group is detached. The reserved regions for that
* group can be part of valid iova now. But since reserved regions
* may be duplicated among groups, populate the iova valid regions
* list again.
*/
static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
struct list_head *iova_copy)
{
struct vfio_domain *d;
struct vfio_group *g;
struct vfio_iova *node;
dma_addr_t start, end;
LIST_HEAD(resv_regions);
int ret;
if (list_empty(iova_copy))
return -EINVAL;
list_for_each_entry(d, &iommu->domain_list, next) {
list_for_each_entry(g, &d->group_list, next) {
ret = iommu_get_group_resv_regions(g->iommu_group,
&resv_regions);
if (ret)
goto done;
}
}
node = list_first_entry(iova_copy, struct vfio_iova, list);
start = node->start;
node = list_last_entry(iova_copy, struct vfio_iova, list);
end = node->end;
/* purge the iova list and create new one */
vfio_iommu_iova_free(iova_copy);
ret = vfio_iommu_aper_resize(iova_copy, start, end);
if (ret)
goto done;
/* Exclude current reserved regions from iova ranges */
ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions);
done:
vfio_iommu_resv_free(&resv_regions);
return ret;
}
static void vfio_iommu_type1_detach_group(void *iommu_data, static void vfio_iommu_type1_detach_group(void *iommu_data,
struct iommu_group *iommu_group) struct iommu_group *iommu_group)
{ {
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain; struct vfio_domain *domain;
struct vfio_group *group; struct vfio_group *group;
LIST_HEAD(iova_copy);
mutex_lock(&iommu->lock); mutex_lock(&iommu->lock);
...@@ -1630,6 +2006,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -1630,6 +2006,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
} }
} }
/*
* Get a copy of iova list. This will be used to update
* and to replace the current one later. Please note that
* we will leave the original list as it is if update fails.
*/
vfio_iommu_iova_get_copy(iommu, &iova_copy);
list_for_each_entry(domain, &iommu->domain_list, next) { list_for_each_entry(domain, &iommu->domain_list, next) {
group = find_iommu_group(domain, iommu_group); group = find_iommu_group(domain, iommu_group);
if (!group) if (!group)
...@@ -1655,10 +2038,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, ...@@ -1655,10 +2038,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
iommu_domain_free(domain->domain); iommu_domain_free(domain->domain);
list_del(&domain->next); list_del(&domain->next);
kfree(domain); kfree(domain);
vfio_iommu_aper_expand(iommu, &iova_copy);
} }
break; break;
} }
if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
else
vfio_iommu_iova_free(&iova_copy);
detach_group_done: detach_group_done:
mutex_unlock(&iommu->lock); mutex_unlock(&iommu->lock);
} }
...@@ -1686,6 +2075,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) ...@@ -1686,6 +2075,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
} }
INIT_LIST_HEAD(&iommu->domain_list); INIT_LIST_HEAD(&iommu->domain_list);
INIT_LIST_HEAD(&iommu->iova_list);
iommu->dma_list = RB_ROOT; iommu->dma_list = RB_ROOT;
iommu->dma_avail = dma_entry_limit; iommu->dma_avail = dma_entry_limit;
mutex_init(&iommu->lock); mutex_init(&iommu->lock);
...@@ -1729,6 +2119,9 @@ static void vfio_iommu_type1_release(void *iommu_data) ...@@ -1729,6 +2119,9 @@ static void vfio_iommu_type1_release(void *iommu_data)
list_del(&domain->next); list_del(&domain->next);
kfree(domain); kfree(domain);
} }
vfio_iommu_iova_free(&iommu->iova_list);
kfree(iommu); kfree(iommu);
} }
...@@ -1749,6 +2142,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) ...@@ -1749,6 +2142,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
return ret; return ret;
} }
static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
size_t size)
{
struct vfio_info_cap_header *header;
struct vfio_iommu_type1_info_cap_iova_range *iova_cap;
header = vfio_info_cap_add(caps, size,
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
if (IS_ERR(header))
return PTR_ERR(header);
iova_cap = container_of(header,
struct vfio_iommu_type1_info_cap_iova_range,
header);
iova_cap->nr_iovas = cap_iovas->nr_iovas;
memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
return 0;
}
static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
struct vfio_info_cap *caps)
{
struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
struct vfio_iova *iova;
size_t size;
int iovas = 0, i = 0, ret;
mutex_lock(&iommu->lock);
list_for_each_entry(iova, &iommu->iova_list, list)
iovas++;
if (!iovas) {
/*
* Return 0 as a container with a single mdev device
* will have an empty list
*/
ret = 0;
goto out_unlock;
}
size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));
cap_iovas = kzalloc(size, GFP_KERNEL);
if (!cap_iovas) {
ret = -ENOMEM;
goto out_unlock;
}
cap_iovas->nr_iovas = iovas;
list_for_each_entry(iova, &iommu->iova_list, list) {
cap_iovas->iova_ranges[i].start = iova->start;
cap_iovas->iova_ranges[i].end = iova->end;
i++;
}
ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);
kfree(cap_iovas);
out_unlock:
mutex_unlock(&iommu->lock);
return ret;
}
static long vfio_iommu_type1_ioctl(void *iommu_data, static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
...@@ -1770,19 +2230,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, ...@@ -1770,19 +2230,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} }
} else if (cmd == VFIO_IOMMU_GET_INFO) { } else if (cmd == VFIO_IOMMU_GET_INFO) {
struct vfio_iommu_type1_info info; struct vfio_iommu_type1_info info;
struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
unsigned long capsz;
int ret;
minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
/* For backward compatibility, cannot require this */
capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
if (copy_from_user(&info, (void __user *)arg, minsz)) if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT; return -EFAULT;
if (info.argsz < minsz) if (info.argsz < minsz)
return -EINVAL; return -EINVAL;
if (info.argsz >= capsz) {
minsz = capsz;
info.cap_offset = 0; /* output, no-recopy necessary */
}
info.flags = VFIO_IOMMU_INFO_PGSIZES; info.flags = VFIO_IOMMU_INFO_PGSIZES;
info.iova_pgsizes = vfio_pgsize_bitmap(iommu); info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
ret = vfio_iommu_iova_build_caps(iommu, &caps);
if (ret)
return ret;
if (caps.size) {
info.flags |= VFIO_IOMMU_INFO_CAPS;
if (info.argsz < sizeof(info) + caps.size) {
info.argsz = sizeof(info) + caps.size;
} else {
vfio_info_cap_shift(&caps, sizeof(info));
if (copy_to_user((void __user *)arg +
sizeof(info), caps.buf,
caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
info.cap_offset = sizeof(info);
}
kfree(caps.buf);
}
return copy_to_user((void __user *)arg, &info, minsz) ? return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0; -EFAULT : 0;
......
...@@ -295,15 +295,38 @@ struct vfio_region_info_cap_type { ...@@ -295,15 +295,38 @@ struct vfio_region_info_cap_type {
__u32 subtype; /* type specific */ __u32 subtype; /* type specific */
}; };
/*
* List of region types, global per bus driver.
* If you introduce a new type, please add it here.
*/
/* PCI region type containing a PCI vendor part */
#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31)
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
#define VFIO_REGION_TYPE_GFX (1)
#define VFIO_REGION_TYPE_CCW (2)
/* sub-types for VFIO_REGION_TYPE_PCI_* */
/* 8086 Vendor sub-types */ /* 8086 vendor PCI sub-types */
#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
#define VFIO_REGION_TYPE_GFX (1) /* 10de vendor PCI sub-types */
/*
* NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
*/
#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
/* 1014 vendor PCI sub-types */
/*
* IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
* to do TLB invalidation on a GPU.
*/
#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1) #define VFIO_REGION_SUBTYPE_GFX_EDID (1)
/** /**
...@@ -353,25 +376,9 @@ struct vfio_region_gfx_edid { ...@@ -353,25 +376,9 @@ struct vfio_region_gfx_edid {
#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2
}; };
#define VFIO_REGION_TYPE_CCW (2) /* sub-types for VFIO_REGION_TYPE_CCW */
/* ccw sub-types */
#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1)
/*
* 10de vendor sub-type
*
* NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
*/
#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
/*
* 1014 vendor sub-type
*
* IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
* to do TLB invalidation on a GPU.
*/
#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
/* /*
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
* which allows direct access to non-MSIX registers which happened to be within * which allows direct access to non-MSIX registers which happened to be within
...@@ -714,7 +721,31 @@ struct vfio_iommu_type1_info { ...@@ -714,7 +721,31 @@ struct vfio_iommu_type1_info {
__u32 argsz; __u32 argsz;
__u32 flags; __u32 flags;
#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */
__u64 iova_pgsizes; /* Bitmap of supported page sizes */ __u64 iova_pgsizes; /* Bitmap of supported page sizes */
__u32 cap_offset; /* Offset within info struct of first cap */
};
/*
* The IOVA capability allows to report the valid IOVA range(s)
* excluding any non-relaxable reserved regions exposed by
* devices attached to the container. Any DMA map attempt
* outside the valid iova range will return error.
*
* The structures below define version 1 of this capability.
*/
#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1
struct vfio_iova_range {
__u64 start;
__u64 end;
};
struct vfio_iommu_type1_info_cap_iova_range {
struct vfio_info_cap_header header;
__u32 nr_iovas;
__u32 reserved;
struct vfio_iova_range iova_ranges[];
}; };
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
......
...@@ -152,20 +152,9 @@ static const struct file_operations vd_fops = { ...@@ -152,20 +152,9 @@ static const struct file_operations vd_fops = {
/* function prototypes */ /* function prototypes */
static int mtty_trigger_interrupt(const guid_t *uuid); static int mtty_trigger_interrupt(struct mdev_state *mdev_state);
/* Helper functions */ /* Helper functions */
static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid)
{
struct mdev_state *mds;
list_for_each_entry(mds, &mdev_devices_list, next) {
if (guid_equal(mdev_uuid(mds->mdev), uuid))
return mds;
}
return NULL;
}
static void dump_buffer(u8 *buf, uint32_t count) static void dump_buffer(u8 *buf, uint32_t count)
{ {
...@@ -337,8 +326,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, ...@@ -337,8 +326,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
pr_err("Serial port %d: Fifo level trigger\n", pr_err("Serial port %d: Fifo level trigger\n",
index); index);
#endif #endif
mtty_trigger_interrupt( mtty_trigger_interrupt(mdev_state);
mdev_uuid(mdev_state->mdev));
} }
} else { } else {
#if defined(DEBUG_INTR) #if defined(DEBUG_INTR)
...@@ -352,8 +340,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, ...@@ -352,8 +340,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
*/ */
if (mdev_state->s[index].uart_reg[UART_IER] & if (mdev_state->s[index].uart_reg[UART_IER] &
UART_IER_RLSI) UART_IER_RLSI)
mtty_trigger_interrupt( mtty_trigger_interrupt(mdev_state);
mdev_uuid(mdev_state->mdev));
} }
mutex_unlock(&mdev_state->rxtx_lock); mutex_unlock(&mdev_state->rxtx_lock);
break; break;
...@@ -372,8 +359,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, ...@@ -372,8 +359,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
pr_err("Serial port %d: IER_THRI write\n", pr_err("Serial port %d: IER_THRI write\n",
index); index);
#endif #endif
mtty_trigger_interrupt( mtty_trigger_interrupt(mdev_state);
mdev_uuid(mdev_state->mdev));
} }
mutex_unlock(&mdev_state->rxtx_lock); mutex_unlock(&mdev_state->rxtx_lock);
...@@ -444,7 +430,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, ...@@ -444,7 +430,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
#if defined(DEBUG_INTR) #if defined(DEBUG_INTR)
pr_err("Serial port %d: MCR_OUT2 write\n", index); pr_err("Serial port %d: MCR_OUT2 write\n", index);
#endif #endif
mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); mtty_trigger_interrupt(mdev_state);
} }
if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
...@@ -452,7 +438,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, ...@@ -452,7 +438,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
#if defined(DEBUG_INTR) #if defined(DEBUG_INTR)
pr_err("Serial port %d: MCR RTS/DTR write\n", index); pr_err("Serial port %d: MCR RTS/DTR write\n", index);
#endif #endif
mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); mtty_trigger_interrupt(mdev_state);
} }
break; break;
...@@ -503,8 +489,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, ...@@ -503,8 +489,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
#endif #endif
if (mdev_state->s[index].uart_reg[UART_IER] & if (mdev_state->s[index].uart_reg[UART_IER] &
UART_IER_THRI) UART_IER_THRI)
mtty_trigger_interrupt( mtty_trigger_interrupt(mdev_state);
mdev_uuid(mdev_state->mdev));
} }
mutex_unlock(&mdev_state->rxtx_lock); mutex_unlock(&mdev_state->rxtx_lock);
...@@ -1028,17 +1013,9 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags, ...@@ -1028,17 +1013,9 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
return ret; return ret;
} }
static int mtty_trigger_interrupt(const guid_t *uuid) static int mtty_trigger_interrupt(struct mdev_state *mdev_state)
{ {
int ret = -1; int ret = -1;
struct mdev_state *mdev_state;
mdev_state = find_mdev_state_by_uuid(uuid);
if (!mdev_state) {
pr_info("%s: mdev not found\n", __func__);
return -EINVAL;
}
if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) && if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
(!mdev_state->msi_evtfd)) (!mdev_state->msi_evtfd))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment