Commit 7eac66d0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:

 - Fix lockdep issue reported for recursive read-lock (Alex Williamson)

 - Fix missing unwind in type1 replay function (Alex Williamson)

* tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio:
  vfio/type1: Add proper error unwind for vfio_iommu_replay()
  vfio-pci: Avoid recursive read-lock usage
parents 33d0f96f aae7a75a
...@@ -33,12 +33,14 @@ ...@@ -33,12 +33,14 @@
struct vfio_pci_ioeventfd { struct vfio_pci_ioeventfd {
struct list_head next; struct list_head next;
struct vfio_pci_device *vdev;
struct virqfd *virqfd; struct virqfd *virqfd;
void __iomem *addr; void __iomem *addr;
uint64_t data; uint64_t data;
loff_t pos; loff_t pos;
int bar; int bar;
int count; int count;
bool test_mem;
}; };
struct vfio_pci_irq_ctx { struct vfio_pci_irq_ctx {
......
...@@ -37,17 +37,70 @@ ...@@ -37,17 +37,70 @@
#define vfio_ioread8 ioread8 #define vfio_ioread8 ioread8
#define vfio_iowrite8 iowrite8 #define vfio_iowrite8 iowrite8
#define VFIO_IOWRITE(size) \
static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \
bool test_mem, u##size val, void __iomem *io) \
{ \
if (test_mem) { \
down_read(&vdev->memory_lock); \
if (!__vfio_pci_memory_enabled(vdev)) { \
up_read(&vdev->memory_lock); \
return -EIO; \
} \
} \
\
vfio_iowrite##size(val, io); \
\
if (test_mem) \
up_read(&vdev->memory_lock); \
\
return 0; \
}
VFIO_IOWRITE(8)
VFIO_IOWRITE(16)
VFIO_IOWRITE(32)
#ifdef iowrite64
VFIO_IOWRITE(64)
#endif
#define VFIO_IOREAD(size) \
static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \
bool test_mem, u##size *val, void __iomem *io) \
{ \
if (test_mem) { \
down_read(&vdev->memory_lock); \
if (!__vfio_pci_memory_enabled(vdev)) { \
up_read(&vdev->memory_lock); \
return -EIO; \
} \
} \
\
*val = vfio_ioread##size(io); \
\
if (test_mem) \
up_read(&vdev->memory_lock); \
\
return 0; \
}
VFIO_IOREAD(8)
VFIO_IOREAD(16)
VFIO_IOREAD(32)
/* /*
* Read or write from an __iomem region (MMIO or I/O port) with an excluded * Read or write from an __iomem region (MMIO or I/O port) with an excluded
* range which is inaccessible. The excluded range drops writes and fills * range which is inaccessible. The excluded range drops writes and fills
* reads with -1. This is intended for handling MSI-X vector tables and * reads with -1. This is intended for handling MSI-X vector tables and
* leftover space for ROM BARs. * leftover space for ROM BARs.
*/ */
static ssize_t do_io_rw(void __iomem *io, char __user *buf, static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start, loff_t off, size_t count, size_t x_start,
size_t x_end, bool iswrite) size_t x_end, bool iswrite)
{ {
ssize_t done = 0; ssize_t done = 0;
int ret;
while (count) { while (count) {
size_t fillable, filled; size_t fillable, filled;
...@@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, ...@@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
if (copy_from_user(&val, buf, 4)) if (copy_from_user(&val, buf, 4))
return -EFAULT; return -EFAULT;
vfio_iowrite32(val, io + off); ret = vfio_pci_iowrite32(vdev, test_mem,
val, io + off);
if (ret)
return ret;
} else { } else {
val = vfio_ioread32(io + off); ret = vfio_pci_ioread32(vdev, test_mem,
&val, io + off);
if (ret)
return ret;
if (copy_to_user(buf, &val, 4)) if (copy_to_user(buf, &val, 4))
return -EFAULT; return -EFAULT;
...@@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, ...@@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
if (copy_from_user(&val, buf, 2)) if (copy_from_user(&val, buf, 2))
return -EFAULT; return -EFAULT;
vfio_iowrite16(val, io + off); ret = vfio_pci_iowrite16(vdev, test_mem,
val, io + off);
if (ret)
return ret;
} else { } else {
val = vfio_ioread16(io + off); ret = vfio_pci_ioread16(vdev, test_mem,
&val, io + off);
if (ret)
return ret;
if (copy_to_user(buf, &val, 2)) if (copy_to_user(buf, &val, 2))
return -EFAULT; return -EFAULT;
...@@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, ...@@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
if (copy_from_user(&val, buf, 1)) if (copy_from_user(&val, buf, 1))
return -EFAULT; return -EFAULT;
vfio_iowrite8(val, io + off); ret = vfio_pci_iowrite8(vdev, test_mem,
val, io + off);
if (ret)
return ret;
} else { } else {
val = vfio_ioread8(io + off); ret = vfio_pci_ioread8(vdev, test_mem,
&val, io + off);
if (ret)
return ret;
if (copy_to_user(buf, &val, 1)) if (copy_to_user(buf, &val, 1))
return -EFAULT; return -EFAULT;
...@@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
count = min(count, (size_t)(end - pos)); count = min(count, (size_t)(end - pos));
if (res->flags & IORESOURCE_MEM) {
down_read(&vdev->memory_lock);
if (!__vfio_pci_memory_enabled(vdev)) {
up_read(&vdev->memory_lock);
return -EIO;
}
}
if (bar == PCI_ROM_RESOURCE) { if (bar == PCI_ROM_RESOURCE) {
/* /*
* The ROM can fill less space than the BAR, so we start the * The ROM can fill less space than the BAR, so we start the
...@@ -213,7 +276,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -213,7 +276,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
x_end = vdev->msix_offset + vdev->msix_size; x_end = vdev->msix_offset + vdev->msix_size;
} }
done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
count, x_start, x_end, iswrite);
if (done >= 0) if (done >= 0)
*ppos += done; *ppos += done;
...@@ -221,9 +285,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -221,9 +285,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
if (bar == PCI_ROM_RESOURCE) if (bar == PCI_ROM_RESOURCE)
pci_unmap_rom(pdev, io); pci_unmap_rom(pdev, io);
out: out:
if (res->flags & IORESOURCE_MEM)
up_read(&vdev->memory_lock);
return done; return done;
} }
...@@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, ...@@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
return ret; return ret;
} }
done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); /*
* VGA MMIO is a legacy, non-BAR resource that hopefully allows
* probing, so we don't currently worry about access in relation
* to the memory enable bit in the command register.
*/
done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
vga_put(vdev->pdev, rsrc); vga_put(vdev->pdev, rsrc);
...@@ -296,17 +362,21 @@ static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) ...@@ -296,17 +362,21 @@ static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
switch (ioeventfd->count) { switch (ioeventfd->count) {
case 1: case 1:
vfio_iowrite8(ioeventfd->data, ioeventfd->addr); vfio_pci_iowrite8(ioeventfd->vdev, ioeventfd->test_mem,
ioeventfd->data, ioeventfd->addr);
break; break;
case 2: case 2:
vfio_iowrite16(ioeventfd->data, ioeventfd->addr); vfio_pci_iowrite16(ioeventfd->vdev, ioeventfd->test_mem,
ioeventfd->data, ioeventfd->addr);
break; break;
case 4: case 4:
vfio_iowrite32(ioeventfd->data, ioeventfd->addr); vfio_pci_iowrite32(ioeventfd->vdev, ioeventfd->test_mem,
ioeventfd->data, ioeventfd->addr);
break; break;
#ifdef iowrite64 #ifdef iowrite64
case 8: case 8:
vfio_iowrite64(ioeventfd->data, ioeventfd->addr); vfio_pci_iowrite64(ioeventfd->vdev, ioeventfd->test_mem,
ioeventfd->data, ioeventfd->addr);
break; break;
#endif #endif
} }
...@@ -378,11 +448,13 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, ...@@ -378,11 +448,13 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
goto out_unlock; goto out_unlock;
} }
ioeventfd->vdev = vdev;
ioeventfd->addr = vdev->barmap[bar] + pos; ioeventfd->addr = vdev->barmap[bar] + pos;
ioeventfd->data = data; ioeventfd->data = data;
ioeventfd->pos = pos; ioeventfd->pos = pos;
ioeventfd->bar = bar; ioeventfd->bar = bar;
ioeventfd->count = count; ioeventfd->count = count;
ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
NULL, NULL, &ioeventfd->virqfd, fd); NULL, NULL, &ioeventfd->virqfd, fd);
......
...@@ -1424,13 +1424,16 @@ static int vfio_bus_type(struct device *dev, void *data) ...@@ -1424,13 +1424,16 @@ static int vfio_bus_type(struct device *dev, void *data)
static int vfio_iommu_replay(struct vfio_iommu *iommu, static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *domain) struct vfio_domain *domain)
{ {
struct vfio_domain *d; struct vfio_domain *d = NULL;
struct rb_node *n; struct rb_node *n;
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
int ret; int ret;
/* Arbitrarily pick the first domain in the list for lookups */ /* Arbitrarily pick the first domain in the list for lookups */
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); if (!list_empty(&iommu->domain_list))
d = list_first_entry(&iommu->domain_list,
struct vfio_domain, next);
n = rb_first(&iommu->dma_list); n = rb_first(&iommu->dma_list);
for (; n; n = rb_next(n)) { for (; n; n = rb_next(n)) {
...@@ -1448,6 +1451,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1448,6 +1451,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
phys_addr_t p; phys_addr_t p;
dma_addr_t i; dma_addr_t i;
if (WARN_ON(!d)) { /* mapped w/o a domain?! */
ret = -EINVAL;
goto unwind;
}
phys = iommu_iova_to_phys(d->domain, iova); phys = iommu_iova_to_phys(d->domain, iova);
if (WARN_ON(!phys)) { if (WARN_ON(!phys)) {
...@@ -1477,7 +1485,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1477,7 +1485,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
if (npage <= 0) { if (npage <= 0) {
WARN_ON(!npage); WARN_ON(!npage);
ret = (int)npage; ret = (int)npage;
return ret; goto unwind;
} }
phys = pfn << PAGE_SHIFT; phys = pfn << PAGE_SHIFT;
...@@ -1486,14 +1494,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ...@@ -1486,14 +1494,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
ret = iommu_map(domain->domain, iova, phys, ret = iommu_map(domain->domain, iova, phys,
size, dma->prot | domain->prot); size, dma->prot | domain->prot);
if (ret) if (ret) {
return ret; if (!dma->iommu_mapped)
vfio_unpin_pages_remote(dma, iova,
phys >> PAGE_SHIFT,
size >> PAGE_SHIFT,
true);
goto unwind;
}
iova += size; iova += size;
} }
}
/* All dmas are now mapped, defer to second tree walk for unwind */
for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
dma->iommu_mapped = true; dma->iommu_mapped = true;
} }
return 0; return 0;
unwind:
for (; n; n = rb_prev(n)) {
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
dma_addr_t iova;
if (dma->iommu_mapped) {
iommu_unmap(domain->domain, dma->iova, dma->size);
continue;
}
iova = dma->iova;
while (iova < dma->iova + dma->size) {
phys_addr_t phys, p;
size_t size;
dma_addr_t i;
phys = iommu_iova_to_phys(domain->domain, iova);
if (!phys) {
iova += PAGE_SIZE;
continue;
}
size = PAGE_SIZE;
p = phys + size;
i = iova + size;
while (i < dma->iova + dma->size &&
p == iommu_iova_to_phys(domain->domain, i)) {
size += PAGE_SIZE;
p += PAGE_SIZE;
i += PAGE_SIZE;
}
iommu_unmap(domain->domain, iova, size);
vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
size >> PAGE_SHIFT, true);
}
}
return ret;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment