Commit ae7a2a3f authored by Michael S. Tsirkin's avatar Michael S. Tsirkin Committed by Avi Kivity

KVM: host side for eoi optimization

Implementation of PV EOI using shared memory.
This reduces the number of exits an interrupt
causes as much as by half.

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.

There's a new MSR to set the address of said register
in guest memory. Otherwise not much changed:
- Guest EOI is not required
- Register is tested & ISR is automatically cleared on exit

For testing results see description of previous patch
'kvm_para: guest side for eoi avoidance'.
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent d905c069
...@@ -175,6 +175,13 @@ enum { ...@@ -175,6 +175,13 @@ enum {
/* apic attention bits */ /* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0 #define KVM_APIC_CHECK_VAPIC 0
/*
* The following bit is set with PV-EOI, unset on EOI.
* We detect PV-EOI changes by guest by comparing
* this bit with PV-EOI in guest memory.
* See the implementation in apic_update_pv_eoi.
*/
#define KVM_APIC_PV_EOI_PENDING 1
/* /*
* We don't want allocation failures within the mmu code, so we preallocate * We don't want allocation failures within the mmu code, so we preallocate
...@@ -484,6 +491,11 @@ struct kvm_vcpu_arch { ...@@ -484,6 +491,11 @@ struct kvm_vcpu_arch {
u64 length; u64 length;
u64 status; u64 status;
} osvw; } osvw;
struct {
u64 msr_val;
struct gfn_to_hva_cache data;
} pv_eoi;
}; };
struct kvm_lpage_info { struct kvm_lpage_info {
......
...@@ -409,6 +409,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, ...@@ -409,6 +409,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
if (sched_info_on()) if (sched_info_on())
......
...@@ -311,6 +311,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) ...@@ -311,6 +311,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
irq->level, irq->trig_mode); irq->level, irq->trig_mode);
} }
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
sizeof(val));
}
static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
{
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
sizeof(*val));
}
static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}
static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0)
apic_debug("Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpi->arch.pv_eoi.msr_val);
return val & 0x1;
}
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
apic_debug("Can't set EOI MSR value: 0x%llx\n",
(unsigned long long)vcpi->arch.pv_eoi.msr_val);
return;
}
__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
apic_debug("Can't clear EOI MSR value: 0x%llx\n",
(unsigned long long)vcpi->arch.pv_eoi.msr_val);
return;
}
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
}
static inline int apic_find_highest_isr(struct kvm_lapic *apic) static inline int apic_find_highest_isr(struct kvm_lapic *apic)
{ {
int result; int result;
...@@ -527,15 +575,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) ...@@ -527,15 +575,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
} }
static void apic_set_eoi(struct kvm_lapic *apic) static int apic_set_eoi(struct kvm_lapic *apic)
{ {
int vector = apic_find_highest_isr(apic); int vector = apic_find_highest_isr(apic);
trace_kvm_eoi(apic, vector);
/* /*
* Not every write EOI will has corresponding ISR, * Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC * one example is when Kernel check timer on setup_IO_APIC
*/ */
if (vector == -1) if (vector == -1)
return; return vector;
apic_clear_isr(vector, apic); apic_clear_isr(vector, apic);
apic_update_ppr(apic); apic_update_ppr(apic);
...@@ -550,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic) ...@@ -550,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
} }
kvm_make_request(KVM_REQ_EVENT, apic->vcpu); kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
return vector;
} }
static void apic_send_ipi(struct kvm_lapic *apic) static void apic_send_ipi(struct kvm_lapic *apic)
...@@ -1132,6 +1184,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) ...@@ -1132,6 +1184,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
atomic_set(&apic->lapic_timer.pending, 0); atomic_set(&apic->lapic_timer.pending, 0);
if (kvm_vcpu_is_bsp(vcpu)) if (kvm_vcpu_is_bsp(vcpu))
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic); apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_arb_prio = 0;
...@@ -1332,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) ...@@ -1332,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
} }
/*
* apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
*
* Detect whether guest triggered PV EOI since the
* last entry. If yes, set EOI on guests's behalf.
* Clear PV EOI in guest memory in any case.
*/
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
struct kvm_lapic *apic)
{
bool pending;
int vector;
/*
* PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
* and KVM_PV_EOI_ENABLED in guest memory as follows:
*
* KVM_APIC_PV_EOI_PENDING is unset:
* -> host disabled PV EOI.
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
* -> host enabled PV EOI, guest did not execute EOI yet.
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
* -> host enabled PV EOI, guest executed EOI.
*/
BUG_ON(!pv_eoi_enabled(vcpu));
pending = pv_eoi_get_pending(vcpu);
/*
* Clear pending bit in any case: it will be set again on vmentry.
* While this might not be ideal from performance point of view,
* this makes sure pv eoi is only enabled when we know it's safe.
*/
pv_eoi_clr_pending(vcpu);
if (pending)
return;
vector = apic_set_eoi(apic);
trace_kvm_pv_eoi(apic, vector);
}
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{ {
u32 data; u32 data;
void *vapic; void *vapic;
if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return; return;
...@@ -1347,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) ...@@ -1347,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
apic_set_tpr(vcpu->arch.apic, data & 0xff); apic_set_tpr(vcpu->arch.apic, data & 0xff);
} }
/*
* apic_sync_pv_eoi_to_guest - called before vmentry
*
* Detect whether it's safe to enable PV EOI and
* if yes do so.
*/
static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
struct kvm_lapic *apic)
{
if (!pv_eoi_enabled(vcpu) ||
/* IRR set or many bits in ISR: could be nested. */
apic->irr_pending ||
/* Cache not set: could be safe but we don't bother. */
apic->highest_isr_cache == -1 ||
/* Need EOI to update ioapic. */
kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
/*
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
* so we need not do anything here.
*/
return;
}
pv_eoi_set_pending(apic->vcpu);
}
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{ {
u32 data, tpr; u32 data, tpr;
int max_irr, max_isr; int max_irr, max_isr;
struct kvm_lapic *apic; struct kvm_lapic *apic = vcpu->arch.apic;
void *vapic; void *vapic;
apic_sync_pv_eoi_to_guest(vcpu, apic);
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return; return;
apic = vcpu->arch.apic;
tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
max_irr = apic_find_highest_irr(apic); max_irr = apic_find_highest_irr(apic);
if (max_irr < 0) if (max_irr < 0)
...@@ -1443,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) ...@@ -1443,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0; return 0;
} }
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
{
u64 addr = data & ~KVM_MSR_ENABLED;
if (!IS_ALIGNED(addr, 4))
return 1;
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
addr);
}
...@@ -69,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) ...@@ -69,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
} }
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
#endif #endif
...@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq, ...@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "") __entry->coalesced ? " (coalesced)" : "")
); );
TRACE_EVENT(kvm_eoi,
TP_PROTO(struct kvm_lapic *apic, int vector),
TP_ARGS(apic, vector),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( int, vector )
),
TP_fast_assign(
__entry->apicid = apic->vcpu->vcpu_id;
__entry->vector = vector;
),
TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
);
TRACE_EVENT(kvm_pv_eoi,
TP_PROTO(struct kvm_lapic *apic, int vector),
TP_ARGS(apic, vector),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( int, vector )
),
TP_fast_assign(
__entry->apicid = apic->vcpu->vcpu_id;
__entry->vector = vector;
),
TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
);
/* /*
* Tracepoint for nested VMRUN * Tracepoint for nested VMRUN
*/ */
......
...@@ -795,6 +795,7 @@ static u32 msrs_to_save[] = { ...@@ -795,6 +795,7 @@ static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR, MSR_STAR,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -1653,6 +1654,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) ...@@ -1653,6 +1654,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
break; break;
case MSR_KVM_PV_EOI_EN:
if (kvm_lapic_enable_pv_eoi(vcpu, data))
return 1;
break;
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
...@@ -5394,6 +5399,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -5394,6 +5399,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
cancel_injection: cancel_injection:
kvm_x86_ops->cancel_injection(vcpu); kvm_x86_ops->cancel_injection(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
out: out:
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment