Commit 184564ef authored by Zhang Haoyu's avatar Zhang Haoyu Committed by Paolo Bonzini

kvm: ioapic: conditionally delay irq delivery duringeoi broadcast

Currently, we call ioapic_service() immediately when we find the irq is still
active during eoi broadcast. But for real hardware, there's some delay between
the EOI writing and irq delivery.  If we do not emulate this behavior, and
re-inject the interrupt immediately after the guest sends an EOI and re-enables
interrupts, a guest might spend all its time in the ISR if it has a broken
handler for a level-triggered interrupt.

Such livelock actually happens with Windows guests when resuming from
hibernation.

As there's no way to recognize the broken handle from new raised ones, this patch
delays an interrupt if 10.000 consecutive EOIs found that the interrupt was
still high.  The guest can then make a little forward progress, until a proper
IRQ handler is set or until some detection routine in the guest (such as
Linux's note_interrupt()) recognizes the situation.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Signed-off-by: default avatarZhang Haoyu <zhanghy@sangfor.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 105b21bb
...@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq, ...@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
__entry->coalesced ? " (coalesced)" : "") __entry->coalesced ? " (coalesced)" : "")
); );
TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
TP_PROTO(__u64 e),
TP_ARGS(e),
TP_STRUCT__entry(
__field( __u64, e )
),
TP_fast_assign(
__entry->e = e;
),
TP_printk("dst %x vec=%u (%s|%s|%s%s)",
(u8)(__entry->e >> 56), (u8)__entry->e,
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
(__entry->e & (1<<11)) ? "logical" : "physical",
(__entry->e & (1<<15)) ? "level" : "edge",
(__entry->e & (1<<16)) ? "|masked" : "")
);
TRACE_EVENT(kvm_msi_set_irq, TRACE_EVENT(kvm_msi_set_irq,
TP_PROTO(__u64 address, __u64 data), TP_PROTO(__u64 address, __u64 data),
TP_ARGS(address, data), TP_ARGS(address, data),
......
...@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) ...@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
spin_unlock(&ioapic->lock); spin_unlock(&ioapic->lock);
} }
static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
{
int i;
struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
eoi_inject.work);
spin_lock(&ioapic->lock);
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
continue;
if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
ioapic_service(ioapic, i, false);
}
spin_unlock(&ioapic->lock);
}
#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode) struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{ {
...@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, ...@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0; ent->fields.remote_irr = 0;
if (ioapic->irr & (1 << i)) if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
ioapic_service(ioapic, i, false); ++ioapic->irq_eoi[i];
if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
/*
* Real hardware does not deliver the interrupt
* immediately during eoi broadcast, and this
* lets a buggy guest make slow progress
* even if it does not correctly handle a
* level-triggered interrupt. Emulate this
* behavior if we detect an interrupt storm.
*/
schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
ioapic->irq_eoi[i] = 0;
trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
} else {
ioapic_service(ioapic, i, false);
}
} else {
ioapic->irq_eoi[i] = 0;
}
} }
} }
...@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ...@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{ {
int i; int i;
cancel_delayed_work_sync(&ioapic->eoi_inject);
for (i = 0; i < IOAPIC_NUM_PINS; i++) for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1; ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0; ioapic->ioregsel = 0;
ioapic->irr = 0; ioapic->irr = 0;
ioapic->id = 0; ioapic->id = 0;
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
rtc_irq_eoi_tracking_reset(ioapic); rtc_irq_eoi_tracking_reset(ioapic);
update_handled_vectors(ioapic); update_handled_vectors(ioapic);
} }
...@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm) ...@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
if (!ioapic) if (!ioapic)
return -ENOMEM; return -ENOMEM;
spin_lock_init(&ioapic->lock); spin_lock_init(&ioapic->lock);
INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
kvm->arch.vioapic = ioapic; kvm->arch.vioapic = ioapic;
kvm_ioapic_reset(ioapic); kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
...@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) ...@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
{ {
struct kvm_ioapic *ioapic = kvm->arch.vioapic; struct kvm_ioapic *ioapic = kvm->arch.vioapic;
cancel_delayed_work_sync(&ioapic->eoi_inject);
if (ioapic) { if (ioapic) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
kvm->arch.vioapic = NULL; kvm->arch.vioapic = NULL;
......
...@@ -59,6 +59,8 @@ struct kvm_ioapic { ...@@ -59,6 +59,8 @@ struct kvm_ioapic {
spinlock_t lock; spinlock_t lock;
DECLARE_BITMAP(handled_vectors, 256); DECLARE_BITMAP(handled_vectors, 256);
struct rtc_status rtc_status; struct rtc_status rtc_status;
struct delayed_work eoi_inject;
u32 irq_eoi[IOAPIC_NUM_PINS];
}; };
#ifdef DEBUG #ifdef DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment