Commit 7a84428a authored by Alex Williamson's avatar Alex Williamson Committed by Avi Kivity

KVM: Add resampling irqfds for level triggered interrupts

To emulate level triggered interrupts, add a resample option to
KVM_IRQFD.  When specified, a new resamplefd is provided that notifies
the user when the irqchip has been resampled by the VM.  This may, for
instance, indicate an EOI.  Also in this mode, posting of an interrupt
through an irqfd only asserts the interrupt.  On resampling, the
interrupt is automatically de-asserted prior to user notification.
This enables level triggered interrupts to be posted and re-enabled
from vfio with no userspace intervention.

All resampling irqfds can make use of a single irq source ID, so we
reserve a new one for this interface.
Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 1e08ec4a
...@@ -1950,6 +1950,19 @@ the guest using the specified gsi pin. The irqfd is removed using ...@@ -1950,6 +1950,19 @@ the guest using the specified gsi pin. The irqfd is removed using
the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
and kvm_irqfd.gsi. and kvm_irqfd.gsi.
With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
mechanism allowing emulation of level-triggered, irqfd-based
interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
additional eventfd in the kvm_irqfd.resamplefd field. When operating
in resample mode, posting of an interrupt through kvm_irq.fd asserts
the specified gsi in the irqchip. When the irqchip is resampled, such
as from an EOI, the gsi is de-asserted and the user is notifed via
kvm_irqfd.resamplefd. It is the user's responsibility to re-queue
the interrupt if the device making use of it still requires service.
Note that closing the resamplefd is not sufficient to disable the
irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
4.76 KVM_PPC_ALLOCATE_HTAB 4.76 KVM_PPC_ALLOCATE_HTAB
Capability: KVM_CAP_PPC_ALLOC_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB
......
...@@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext) ...@@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_2_3: case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM: case KVM_CAP_READONLY_MEM:
case KVM_CAP_IRQFD_RESAMPLE:
r = 1; r = 1;
break; break;
case KVM_CAP_COALESCED_MMIO: case KVM_CAP_COALESCED_MMIO:
...@@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
&kvm->arch.irq_sources_bitmap);
raw_spin_lock_init(&kvm->arch.tsc_write_lock); raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock); mutex_init(&kvm->arch.apic_map_lock);
......
...@@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info { ...@@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info {
#ifdef __KVM_HAVE_READONLY_MEM #ifdef __KVM_HAVE_READONLY_MEM
#define KVM_CAP_READONLY_MEM 81 #define KVM_CAP_READONLY_MEM 81
#endif #endif
#define KVM_CAP_IRQFD_RESAMPLE 82
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
...@@ -690,12 +691,21 @@ struct kvm_xen_hvm_config { ...@@ -690,12 +691,21 @@ struct kvm_xen_hvm_config {
#endif #endif
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
/*
* Available with KVM_CAP_IRQFD_RESAMPLE
*
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
* the irqfd to operate in resampling mode for level triggered interrupt
* emlation. See Documentation/virtual/kvm/api.txt.
*/
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
struct kvm_irqfd { struct kvm_irqfd {
__u32 fd; __u32 fd;
__u32 gsi; __u32 gsi;
__u32 flags; __u32 flags;
__u8 pad[20]; __u32 resamplefd;
__u8 pad[16];
}; };
struct kvm_clock_data { struct kvm_clock_data {
......
...@@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page) ...@@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_PMU 16 #define KVM_REQ_PMU 16
#define KVM_REQ_PMI 17 #define KVM_REQ_PMI 17
#define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
struct kvm; struct kvm;
struct kvm_vcpu; struct kvm_vcpu;
...@@ -343,6 +344,8 @@ struct kvm { ...@@ -343,6 +344,8 @@ struct kvm {
struct { struct {
spinlock_t lock; spinlock_t lock;
struct list_head items; struct list_head items;
struct list_head resampler_list;
struct mutex resampler_lock;
} irqfds; } irqfds;
struct list_head ioeventfds; struct list_head ioeventfds;
#endif #endif
......
...@@ -43,6 +43,31 @@ ...@@ -43,6 +43,31 @@
* -------------------------------------------------------------------- * --------------------------------------------------------------------
*/ */
/*
* Resampling irqfds are a special variety of irqfds used to emulate
* level triggered interrupts. The interrupt is asserted on eventfd
* trigger. On acknowledgement through the irq ack notifier, the
* interrupt is de-asserted and userspace is notified through the
* resamplefd. All resamplers on the same gsi are de-asserted
* together, so we don't need to track the state of each individual
* user. We can also therefore share the same irq source ID.
*/
struct _irqfd_resampler {
struct kvm *kvm;
/*
* List of resampling struct _irqfd objects sharing this gsi.
* RCU list modified under kvm->irqfds.resampler_lock
*/
struct list_head list;
struct kvm_irq_ack_notifier notifier;
/*
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
* resamplers among irqfds on the same gsi.
* Accessed and modified under kvm->irqfds.resampler_lock
*/
struct list_head link;
};
struct _irqfd { struct _irqfd {
/* Used for MSI fast-path */ /* Used for MSI fast-path */
struct kvm *kvm; struct kvm *kvm;
...@@ -52,6 +77,12 @@ struct _irqfd { ...@@ -52,6 +77,12 @@ struct _irqfd {
/* Used for level IRQ fast-path */ /* Used for level IRQ fast-path */
int gsi; int gsi;
struct work_struct inject; struct work_struct inject;
/* The resampler used by this irqfd (resampler-only) */
struct _irqfd_resampler *resampler;
/* Eventfd notified on resample (resampler-only) */
struct eventfd_ctx *resamplefd;
/* Entry in list of irqfds for a resampler (resampler-only) */
struct list_head resampler_link;
/* Used for setup/shutdown */ /* Used for setup/shutdown */
struct eventfd_ctx *eventfd; struct eventfd_ctx *eventfd;
struct list_head list; struct list_head list;
...@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work) ...@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm; struct kvm *kvm = irqfd->kvm;
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); if (!irqfd->resampler) {
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
} else
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
irqfd->gsi, 1);
}
/*
* Since resampler irqfds share an IRQ source ID, we de-assert once
* then notify all of the resampler irqfds using this GSI. We can't
* do multiple de-asserts or we risk racing with incoming re-asserts.
*/
static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
struct _irqfd_resampler *resampler;
struct _irqfd *irqfd;
resampler = container_of(kian, struct _irqfd_resampler, notifier);
kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0);
rcu_read_lock();
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
eventfd_signal(irqfd->resamplefd, 1);
rcu_read_unlock();
}
static void
irqfd_resampler_shutdown(struct _irqfd *irqfd)
{
struct _irqfd_resampler *resampler = irqfd->resampler;
struct kvm *kvm = resampler->kvm;
mutex_lock(&kvm->irqfds.resampler_lock);
list_del_rcu(&irqfd->resampler_link);
synchronize_rcu();
if (list_empty(&resampler->list)) {
list_del(&resampler->link);
kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0);
kfree(resampler);
}
mutex_unlock(&kvm->irqfds.resampler_lock);
} }
/* /*
...@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work) ...@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work)
*/ */
flush_work_sync(&irqfd->inject); flush_work_sync(&irqfd->inject);
if (irqfd->resampler) {
irqfd_resampler_shutdown(irqfd);
eventfd_ctx_put(irqfd->resamplefd);
}
/* /*
* It is now safe to release the object's resources * It is now safe to release the object's resources
*/ */
...@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) ...@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
struct kvm_irq_routing_table *irq_rt; struct kvm_irq_routing_table *irq_rt;
struct _irqfd *irqfd, *tmp; struct _irqfd *irqfd, *tmp;
struct file *file = NULL; struct file *file = NULL;
struct eventfd_ctx *eventfd = NULL; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret; int ret;
unsigned int events; unsigned int events;
...@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) ...@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
irqfd->eventfd = eventfd; irqfd->eventfd = eventfd;
if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
struct _irqfd_resampler *resampler;
resamplefd = eventfd_ctx_fdget(args->resamplefd);
if (IS_ERR(resamplefd)) {
ret = PTR_ERR(resamplefd);
goto fail;
}
irqfd->resamplefd = resamplefd;
INIT_LIST_HEAD(&irqfd->resampler_link);
mutex_lock(&kvm->irqfds.resampler_lock);
list_for_each_entry(resampler,
&kvm->irqfds.resampler_list, list) {
if (resampler->notifier.gsi == irqfd->gsi) {
irqfd->resampler = resampler;
break;
}
}
if (!irqfd->resampler) {
resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
if (!resampler) {
ret = -ENOMEM;
mutex_unlock(&kvm->irqfds.resampler_lock);
goto fail;
}
resampler->kvm = kvm;
INIT_LIST_HEAD(&resampler->list);
resampler->notifier.gsi = irqfd->gsi;
resampler->notifier.irq_acked = irqfd_resampler_ack;
INIT_LIST_HEAD(&resampler->link);
list_add(&resampler->link, &kvm->irqfds.resampler_list);
kvm_register_irq_ack_notifier(kvm,
&resampler->notifier);
irqfd->resampler = resampler;
}
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
synchronize_rcu();
mutex_unlock(&kvm->irqfds.resampler_lock);
}
/* /*
* Install our own custom wake-up handling so we are notified via * Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd * a callback whenever someone signals the underlying eventfd
...@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) ...@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
return 0; return 0;
fail: fail:
if (irqfd->resampler)
irqfd_resampler_shutdown(irqfd);
if (resamplefd && !IS_ERR(resamplefd))
eventfd_ctx_put(resamplefd);
if (eventfd && !IS_ERR(eventfd)) if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd); eventfd_ctx_put(eventfd);
...@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm) ...@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm)
{ {
spin_lock_init(&kvm->irqfds.lock); spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
mutex_init(&kvm->irqfds.resampler_lock);
INIT_LIST_HEAD(&kvm->ioeventfds); INIT_LIST_HEAD(&kvm->ioeventfds);
} }
...@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) ...@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
int int
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{ {
if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN) if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
return -EINVAL; return -EINVAL;
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
......
...@@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm) ...@@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
} }
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
#ifdef CONFIG_X86
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
#endif
set_bit(irq_source_id, bitmap); set_bit(irq_source_id, bitmap);
unlock: unlock:
mutex_unlock(&kvm->irq_lock); mutex_unlock(&kvm->irq_lock);
...@@ -238,6 +241,9 @@ int kvm_request_irq_source_id(struct kvm *kvm) ...@@ -238,6 +241,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
{ {
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
#ifdef CONFIG_X86
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
#endif
mutex_lock(&kvm->irq_lock); mutex_lock(&kvm->irq_lock);
if (irq_source_id < 0 || if (irq_source_id < 0 ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment