Commit 18c198c9 authored by Alex Williamson's avatar Alex Williamson

vfio/pci: Create persistent INTx handler

A vulnerability exists where the eventfd for INTx signaling can be
deconfigured, which unregisters the IRQ handler but still allows
eventfds to be signaled with a NULL context through the SET_IRQS ioctl
or through unmask irqfd if the device interrupt is pending.

Ideally this could be solved with some additional locking; the igate
mutex serializes the ioctl and config space accesses, and the interrupt
handler is unregistered relative to the trigger, but the irqfd path
runs asynchronous to those.  The igate mutex cannot be acquired from the
atomic context of the eventfd wake function.  Disabling the irqfd
relative to the eventfd registration is potentially incompatible with
existing userspace.

As a result, the solution implemented here moves configuration of the
INTx interrupt handler to track the lifetime of the INTx context object
and irq_type configuration, rather than registration of a particular
trigger eventfd.  Synchronization is added between the ioctl path and
eventfd_signal() wrapper such that the eventfd trigger can be
dynamically updated relative to in-flight interrupts or irqfd callbacks.

Cc:  <stable@vger.kernel.org>
Fixes: 89e1f7d4 ("vfio: Add PCI device driver")
Reported-by: default avatarReinette Chatre <reinette.chatre@intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarReinette Chatre <reinette.chatre@intel.com>
Reviewed-by: default avatarEric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent b620ecbd
...@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) ...@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
if (likely(is_intx(vdev) && !vdev->virq_disabled)) { if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger;
ctx = vfio_irq_ctx_get(vdev, 0); ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx)) if (WARN_ON_ONCE(!ctx))
return; return;
eventfd_signal(ctx->trigger);
trigger = READ_ONCE(ctx->trigger);
if (likely(trigger))
eventfd_signal(trigger);
} }
} }
...@@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) ...@@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
return ret; return ret;
} }
static int vfio_intx_enable(struct vfio_pci_core_device *vdev) static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
struct eventfd_ctx *trigger)
{ {
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx;
unsigned long irqflags;
char *name;
int ret;
if (!is_irq_none(vdev)) if (!is_irq_none(vdev))
return -EINVAL; return -EINVAL;
if (!vdev->pdev->irq) if (!pdev->irq)
return -ENODEV; return -ENODEV;
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
if (!name)
return -ENOMEM;
ctx = vfio_irq_ctx_alloc(vdev, 0); ctx = vfio_irq_ctx_alloc(vdev, 0);
if (!ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
ctx->name = name;
ctx->trigger = trigger;
/* /*
* If the virtual interrupt is masked, restore it. Devices * Fill the initial masked state based on virq_disabled. After
* supporting DisINTx can be masked at the hardware level * enable, changing the DisINTx bit in vconfig directly changes INTx
* here, non-PCI-2.3 devices will have to wait until the * masking. igate prevents races during setup, once running masked
* interrupt is enabled. * is protected via irqlock.
*
* Devices supporting DisINTx also reflect the current mask state in
* the physical DisINTx bit, which is not affected during IRQ setup.
*
* Devices without DisINTx support require an exclusive interrupt.
* IRQ masking is performed at the IRQ chip. Again, igate protects
* against races during setup and IRQ handlers and irqfds are not
* yet active, therefore masked is stable and can be used to
* conditionally auto-enable the IRQ.
*
* irq_type must be stable while the IRQ handler is registered,
* therefore it must be set before request_irq().
*/ */
ctx->masked = vdev->virq_disabled; ctx->masked = vdev->virq_disabled;
if (vdev->pci_2_3) if (vdev->pci_2_3) {
pci_intx(vdev->pdev, !ctx->masked); pci_intx(pdev, !ctx->masked);
irqflags = IRQF_SHARED;
} else {
irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
}
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
ret = request_irq(pdev->irq, vfio_intx_handler,
irqflags, ctx->name, vdev);
if (ret) {
vdev->irq_type = VFIO_PCI_NUM_IRQS;
kfree(name);
vfio_irq_ctx_free(vdev, ctx, 0);
return ret;
}
return 0; return 0;
} }
static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
struct eventfd_ctx *trigger)
{ {
struct pci_dev *pdev = vdev->pdev; struct pci_dev *pdev = vdev->pdev;
unsigned long irqflags = IRQF_SHARED;
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger; struct eventfd_ctx *old;
unsigned long flags;
int ret;
ctx = vfio_irq_ctx_get(vdev, 0); ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx)) if (WARN_ON_ONCE(!ctx))
return -EINVAL; return -EINVAL;
if (ctx->trigger) { old = ctx->trigger;
free_irq(pdev->irq, vdev);
kfree(ctx->name);
eventfd_ctx_put(ctx->trigger);
ctx->trigger = NULL;
}
if (fd < 0) /* Disable only */
return 0;
ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
pci_name(pdev));
if (!ctx->name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
kfree(ctx->name);
return PTR_ERR(trigger);
}
ctx->trigger = trigger; WRITE_ONCE(ctx->trigger, trigger);
/* /* Releasing an old ctx requires synchronizing in-flight users */
* Devices without DisINTx support require an exclusive interrupt, if (old) {
* IRQ masking is performed at the IRQ chip. The masked status is synchronize_irq(pdev->irq);
* protected by vdev->irqlock. Setup the IRQ without auto-enable and vfio_virqfd_flush_thread(&ctx->unmask);
* unmask as necessary below under lock. DisINTx is unmodified by eventfd_ctx_put(old);
* the IRQ configuration and may therefore use auto-enable.
*/
if (!vdev->pci_2_3)
irqflags = IRQF_NO_AUTOEN;
ret = request_irq(pdev->irq, vfio_intx_handler,
irqflags, ctx->name, vdev);
if (ret) {
ctx->trigger = NULL;
kfree(ctx->name);
eventfd_ctx_put(trigger);
return ret;
} }
spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3 && !ctx->masked)
enable_irq(pdev->irq);
spin_unlock_irqrestore(&vdev->irqlock, flags);
return 0; return 0;
} }
static void vfio_intx_disable(struct vfio_pci_core_device *vdev) static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{ {
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx; struct vfio_pci_irq_ctx *ctx;
ctx = vfio_irq_ctx_get(vdev, 0); ctx = vfio_irq_ctx_get(vdev, 0);
...@@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev) ...@@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
if (ctx) { if (ctx) {
vfio_virqfd_disable(&ctx->unmask); vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&ctx->mask); vfio_virqfd_disable(&ctx->mask);
free_irq(pdev->irq, vdev);
if (ctx->trigger)
eventfd_ctx_put(ctx->trigger);
kfree(ctx->name);
vfio_irq_ctx_free(vdev, ctx, 0);
} }
vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->irq_type = VFIO_PCI_NUM_IRQS;
vfio_irq_ctx_free(vdev, ctx, 0);
} }
/* /*
...@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, ...@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
return -EINVAL; return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
struct eventfd_ctx *trigger = NULL;
int32_t fd = *(int32_t *)data; int32_t fd = *(int32_t *)data;
int ret; int ret;
if (is_intx(vdev)) if (fd >= 0) {
return vfio_intx_set_signal(vdev, fd); trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger))
return PTR_ERR(trigger);
}
ret = vfio_intx_enable(vdev); if (is_intx(vdev))
if (ret) ret = vfio_intx_set_signal(vdev, trigger);
return ret; else
ret = vfio_intx_enable(vdev, trigger);
ret = vfio_intx_set_signal(vdev, fd); if (ret && trigger)
if (ret) eventfd_ctx_put(trigger);
vfio_intx_disable(vdev);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment