Commit 41786cc5 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-misc-6.12' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.12

 - Advertise AVX10.1 to userspace (effectively prep work for the "real" AVX10
   functionality that is on the horizon).

 - Rework common MSR handling code to suppress errors on userspace accesses to
   unsupported-but-advertised MSRs.  This will allow removing (almost?) all of
   KVM's exemptions for userspace access to MSRs that shouldn't exist based on
   the vCPU model (the actual cleanup is non-trivial future work).

 - Rework KVM's handling of x2APIC ICR, again, because AMD (x2AVIC) splits the
   64-bit value into the legacy ICR and ICR2 storage, whereas Intel (APICv)
   stores the entire 64-bit value a the ICR offset.

 - Fix a bug where KVM would fail to exit to userspace if one was triggered by
   a fastpath exit handler.

 - Add fastpath handling of HLT VM-Exit to expedite re-entering the guest when
   there's already a pending wake event at the time of the exit.

 - Finally fix the RSM vs. nested VM-Enter WARN by forcing the vCPU out of
   guest mode prior to signalling SHUTDOWN (architecturally, the SHUTDOWN is
   supposed to hit L1, not L2).
parents 7056c4e2 4ca077f2
...@@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function) ...@@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
case 0x1d: case 0x1d:
case 0x1e: case 0x1e:
case 0x1f: case 0x1f:
case 0x24:
case 0x8000001d: case 0x8000001d:
return true; return true;
} }
......
...@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) ...@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from) KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
KVM_X86_OP(get_msr_feature) KVM_X86_OP(get_feature_msr)
KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
......
...@@ -212,6 +212,7 @@ enum exit_fastpath_completion { ...@@ -212,6 +212,7 @@ enum exit_fastpath_completion {
EXIT_FASTPATH_NONE, EXIT_FASTPATH_NONE,
EXIT_FASTPATH_REENTER_GUEST, EXIT_FASTPATH_REENTER_GUEST,
EXIT_FASTPATH_EXIT_HANDLED, EXIT_FASTPATH_EXIT_HANDLED,
EXIT_FASTPATH_EXIT_USERSPACE,
}; };
typedef enum exit_fastpath_completion fastpath_t; typedef enum exit_fastpath_completion fastpath_t;
...@@ -1730,6 +1731,8 @@ struct kvm_x86_ops { ...@@ -1730,6 +1731,8 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
const bool x2apic_icr_is_split;
const unsigned long required_apicv_inhibits; const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization; bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
...@@ -1809,7 +1812,7 @@ struct kvm_x86_ops { ...@@ -1809,7 +1812,7 @@ struct kvm_x86_ops {
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
void (*guest_memory_reclaimed)(struct kvm *kvm); void (*guest_memory_reclaimed)(struct kvm *kvm);
int (*get_msr_feature)(struct kvm_msr_entry *entry); int (*get_feature_msr)(u32 msr, u64 *data);
int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len); void *insn, int insn_len);
......
...@@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void) ...@@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) | F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
F(AMX_COMPLEX) F(AMX_COMPLEX) | F(AVX10)
); );
kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
...@@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void) ...@@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void)
SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
); );
kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX,
F(AVX10_128) | F(AVX10_256) | F(AVX10_512)
);
kvm_cpu_cap_mask(CPUID_8000_0001_ECX, kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
...@@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
switch (function) { switch (function) {
case 0: case 0:
/* Limited to the highest leaf implemented in KVM. */ /* Limited to the highest leaf implemented in KVM. */
entry->eax = min(entry->eax, 0x1fU); entry->eax = min(entry->eax, 0x24U);
break; break;
case 1: case 1:
cpuid_entry_override(entry, CPUID_1_EDX); cpuid_entry_override(entry, CPUID_1_EDX);
...@@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break; break;
} }
break; break;
case 0x24: {
u8 avx10_version;
if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
/*
* The AVX10 version is encoded in EBX[7:0]. Note, the version
* is guaranteed to be >=1 if AVX10 is supported. Note #2, the
* version needs to be captured before overriding EBX features!
*/
avx10_version = min_t(u8, entry->ebx & 0xff, 1);
cpuid_entry_override(entry, CPUID_24_0_EBX);
entry->ebx |= avx10_version;
entry->eax = 0;
entry->ecx = 0;
entry->edx = 0;
break;
}
case KVM_CPUID_SIGNATURE: { case KVM_CPUID_SIGNATURE: {
const u32 *sigptr = (const u32 *)KVM_SIGNATURE; const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES; entry->eax = KVM_CPUID_FEATURES;
......
...@@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) ...@@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
u64 ns = 0; u64 ns = 0;
ktime_t expire; ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu; struct kvm_vcpu *vcpu = apic->vcpu;
unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
unsigned long flags; unsigned long flags;
ktime_t now; ktime_t now;
...@@ -2453,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) ...@@ -2453,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{
if (data & X2APIC_ICR_RESERVED_BITS)
return 1;
/*
* The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
* only AMD requires it to be zero, Intel essentially just ignores the
* bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
* the CPU performs the reserved bits checks, i.e. the underlying CPU
* behavior will "win". Arbitrarily clear the BUSY bit, as there is no
* sane way to provide consistent behavior with respect to hardware.
*/
data &= ~APIC_ICR_BUSY;
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
if (kvm_x86_ops.x2apic_icr_is_split) {
kvm_lapic_set_reg(apic, APIC_ICR, data);
kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
} else {
kvm_lapic_set_reg64(apic, APIC_ICR, data);
}
trace_kvm_apic_write(APIC_ICR, data);
return 0;
}
static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
{
if (kvm_x86_ops.x2apic_icr_is_split)
return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
(u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
return kvm_lapic_get_reg64(apic, APIC_ICR);
}
/* emulate APIC access in a trap manner */ /* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{ {
...@@ -2470,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) ...@@ -2470,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
* maybe-unecessary write, and both are in the noise anyways. * maybe-unecessary write, and both are in the noise anyways.
*/ */
if (apic_x2apic_mode(apic) && offset == APIC_ICR) if (apic_x2apic_mode(apic) && offset == APIC_ICR)
kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
else else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
} }
...@@ -2990,12 +3027,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, ...@@ -2990,12 +3027,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
/* /*
* In x2APIC mode, the LDR is fixed and based on the id. And * In x2APIC mode, the LDR is fixed and based on the id. And
* ICR is internally a single 64-bit register, but needs to be * if the ICR is _not_ split, ICR is internally a single 64-bit
* split to ICR+ICR2 in userspace for backwards compatibility. * register, but needs to be split to ICR+ICR2 in userspace for
* backwards compatibility.
*/ */
if (set) { if (set)
*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
if (!kvm_x86_ops.x2apic_icr_is_split) {
if (set) {
icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
(u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
__kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
...@@ -3004,6 +3044,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, ...@@ -3004,6 +3044,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
} }
} }
}
return 0; return 0;
} }
...@@ -3194,22 +3235,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) ...@@ -3194,22 +3235,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
return 0; return 0;
} }
int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{
data &= ~APIC_ICR_BUSY;
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
kvm_lapic_set_reg64(apic, APIC_ICR, data);
trace_kvm_apic_write(APIC_ICR, data);
return 0;
}
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
{ {
u32 low; u32 low;
if (reg == APIC_ICR) { if (reg == APIC_ICR) {
*data = kvm_lapic_get_reg64(apic, APIC_ICR); *data = kvm_x2apic_icr_read(apic);
return 0; return 0;
} }
......
...@@ -96,7 +96,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); ...@@ -96,7 +96,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_recalculate_apic_map(struct kvm *kvm);
void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu);
......
...@@ -223,8 +223,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -223,8 +223,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool kvm_mmu_may_ignore_guest_pat(void); bool kvm_mmu_may_ignore_guest_pat(void);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
int kvm_mmu_post_init_vm(struct kvm *kvm); int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
......
...@@ -349,8 +349,6 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, ...@@ -349,8 +349,6 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
......
...@@ -17,6 +17,7 @@ enum kvm_only_cpuid_leafs { ...@@ -17,6 +17,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0007_EDX, CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX, CPUID_8000_0022_EAX,
CPUID_7_2_EDX, CPUID_7_2_EDX,
CPUID_24_0_EBX,
NR_KVM_CPU_CAPS, NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
...@@ -46,6 +47,7 @@ enum kvm_only_cpuid_leafs { ...@@ -46,6 +47,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8) #define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) #define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
#define X86_FEATURE_AVX10 KVM_X86_FEATURE(CPUID_7_1_EDX, 19)
/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ /* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0) #define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0)
...@@ -55,6 +57,11 @@ enum kvm_only_cpuid_leafs { ...@@ -55,6 +57,11 @@ enum kvm_only_cpuid_leafs {
#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* Intel-defined sub-features, CPUID level 0x00000024:0 (EBX) */
#define X86_FEATURE_AVX10_128 KVM_X86_FEATURE(CPUID_24_0_EBX, 16)
#define X86_FEATURE_AVX10_256 KVM_X86_FEATURE(CPUID_24_0_EBX, 17)
#define X86_FEATURE_AVX10_512 KVM_X86_FEATURE(CPUID_24_0_EBX, 18)
/* CPUID level 0x80000007 (EDX). */ /* CPUID level 0x80000007 (EDX). */
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
...@@ -90,6 +97,7 @@ static const struct cpuid_reg reverse_cpuid[] = { ...@@ -90,6 +97,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
}; };
/* /*
......
...@@ -624,17 +624,31 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) ...@@ -624,17 +624,31 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
#endif #endif
/* /*
* Give leave_smm() a chance to make ISA-specific changes to the vCPU * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
* state (e.g. enter guest mode) before loading state from the SMM * mode should happen _after_ loading state from SMRAM. However, KVM
* state-save area. * piggybacks the nested VM-Enter flows (which is wrong for many other
* reasons), and so nSVM/nVMX would clobber state that is loaded from
* SMRAM and from the VMCS/VMCB.
*/ */
if (kvm_x86_call(leave_smm)(vcpu, &smram)) if (kvm_x86_call(leave_smm)(vcpu, &smram))
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
return rsm_load_state_64(ctxt, &smram.smram64); ret = rsm_load_state_64(ctxt, &smram.smram64);
else else
#endif #endif
return rsm_load_state_32(ctxt, &smram.smram32); ret = rsm_load_state_32(ctxt, &smram.smram32);
/*
* If RSM fails and triggers shutdown, architecturally the shutdown
* occurs *before* the transition to guest mode. But due to KVM's
* flawed handling of RSM to L2 (see above), the vCPU may already be
* in_guest_mode(). Force the vCPU out of guest mode before delivering
* the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
* that architecturally shouldn't be possible.
*/
if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
kvm_leave_nested(vcpu);
return ret;
} }
...@@ -2825,17 +2825,17 @@ static int efer_trap(struct kvm_vcpu *vcpu) ...@@ -2825,17 +2825,17 @@ static int efer_trap(struct kvm_vcpu *vcpu)
return kvm_complete_insn_gp(vcpu, ret); return kvm_complete_insn_gp(vcpu, ret);
} }
static int svm_get_msr_feature(struct kvm_msr_entry *msr) static int svm_get_feature_msr(u32 msr, u64 *data)
{ {
msr->data = 0; *data = 0;
switch (msr->index) { switch (msr) {
case MSR_AMD64_DE_CFG: case MSR_AMD64_DE_CFG:
if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; *data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break; break;
default: default:
return KVM_MSR_RET_INVALID; return KVM_MSR_RET_UNSUPPORTED;
} }
return 0; return 0;
...@@ -3191,18 +3191,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -3191,18 +3191,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm_pr_unimpl_wrmsr(vcpu, ecx, data); kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break; break;
case MSR_AMD64_DE_CFG: { case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry; u64 supported_de_cfg;
msr_entry.index = msr->index; if (svm_get_feature_msr(ecx, &supported_de_cfg))
if (svm_get_msr_feature(&msr_entry))
return 1; return 1;
/* Check the supported bits */ if (data & ~supported_de_cfg)
if (data & ~msr_entry.data)
return 1; return 1;
/* Don't allow the guest to change a bit, #GP */ /*
if (!msr->host_initiated && (data ^ msr_entry.data)) * Don't let the guest change the host-programmed value. The
* MSR is very model specific, i.e. contains multiple bits that
* are completely unknown to KVM, and the one bit known to KVM
* is simply a reflection of hardware capabilities.
*/
if (!msr->host_initiated && data != svm->msr_decfg)
return 1; return 1;
svm->msr_decfg = data; svm->msr_decfg = data;
...@@ -4156,12 +4159,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) ...@@ -4156,12 +4159,21 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE; return EXIT_FASTPATH_NONE;
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && switch (svm->vmcb->control.exit_code) {
to_svm(vcpu)->vmcb->control.exit_info_1) case SVM_EXIT_MSR:
if (!svm->vmcb->control.exit_info_1)
break;
return handle_fastpath_set_msr_irqoff(vcpu); return handle_fastpath_set_msr_irqoff(vcpu);
case SVM_EXIT_HLT:
return handle_fastpath_hlt(vcpu);
default:
break;
}
return EXIT_FASTPATH_NONE; return EXIT_FASTPATH_NONE;
} }
...@@ -5012,7 +5024,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { ...@@ -5012,7 +5024,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_unblocking = avic_vcpu_unblocking, .vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap, .update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature, .get_feature_msr = svm_get_feature_msr,
.get_msr = svm_get_msr, .get_msr = svm_get_msr,
.set_msr = svm_set_msr, .set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base, .get_segment_base = svm_get_segment_base,
...@@ -5063,6 +5075,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { ...@@ -5063,6 +5075,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window, .enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window, .enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept, .update_cr8_intercept = svm_update_cr8_intercept,
.x2apic_icr_is_split = true,
.set_virtual_apic_mode = avic_refresh_virtual_apic_mode, .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.apicv_post_state_restore = avic_apicv_post_state_restore, .apicv_post_state_restore = avic_apicv_post_state_restore,
......
...@@ -43,7 +43,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { ...@@ -43,7 +43,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vcpu_put = vmx_vcpu_put, .vcpu_put = vmx_vcpu_put,
.update_exception_bitmap = vmx_update_exception_bitmap, .update_exception_bitmap = vmx_update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature, .get_feature_msr = vmx_get_feature_msr,
.get_msr = vmx_get_msr, .get_msr = vmx_get_msr,
.set_msr = vmx_set_msr, .set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base, .get_segment_base = vmx_get_segment_base,
...@@ -91,6 +91,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { ...@@ -91,6 +91,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.enable_nmi_window = vmx_enable_nmi_window, .enable_nmi_window = vmx_enable_nmi_window,
.enable_irq_window = vmx_enable_irq_window, .enable_irq_window = vmx_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept, .update_cr8_intercept = vmx_update_cr8_intercept,
.x2apic_icr_is_split = false,
.set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr, .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
......
...@@ -1998,15 +1998,15 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, ...@@ -1998,15 +1998,15 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
return !(msr->data & ~valid_bits); return !(msr->data & ~valid_bits);
} }
int vmx_get_msr_feature(struct kvm_msr_entry *msr) int vmx_get_feature_msr(u32 msr, u64 *data)
{ {
switch (msr->index) { switch (msr) {
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested) if (!nested)
return 1; return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); return vmx_get_vmx_msr(&vmcs_config.nested, msr, data);
default: default:
return KVM_MSR_RET_INVALID; return KVM_MSR_RET_UNSUPPORTED;
} }
} }
...@@ -7265,6 +7265,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, ...@@ -7265,6 +7265,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
return handle_fastpath_set_msr_irqoff(vcpu); return handle_fastpath_set_msr_irqoff(vcpu);
case EXIT_REASON_PREEMPTION_TIMER: case EXIT_REASON_PREEMPTION_TIMER:
return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
case EXIT_REASON_HLT:
return handle_fastpath_hlt(vcpu);
default: default:
return EXIT_FASTPATH_NONE; return EXIT_FASTPATH_NONE;
} }
......
...@@ -17,10 +17,6 @@ ...@@ -17,10 +17,6 @@
#include "run_flags.h" #include "run_flags.h"
#include "../mmu.h" #include "../mmu.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -57,7 +57,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); ...@@ -57,7 +57,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); void vmx_msr_filter_changed(struct kvm_vcpu *vcpu);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
int vmx_get_msr_feature(struct kvm_msr_entry *msr); int vmx_get_feature_msr(u32 msr, u64 *data);
int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg);
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
......
...@@ -305,24 +305,237 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { ...@@ -305,24 +305,237 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
static struct kmem_cache *x86_emulator_cache; static struct kmem_cache *x86_emulator_cache;
/* /*
* When called, it means the previous get/set msr reached an invalid msr. * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
* Return true if we want to ignore/silent this failed msr access. * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
* KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that
* require host support, i.e. should be probed via RDMSR. emulated_msrs holds
* MSRs that KVM emulates without strictly requiring host support.
* msr_based_features holds MSRs that enumerate features, i.e. are effectively
* CPUID leafs. Note, msr_based_features isn't mutually exclusive with
* msrs_to_save and emulated_msrs.
*/
static const u32 msrs_to_save_base[] = {
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
/* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
/* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
ARRAY_SIZE(msrs_to_save_pmu)];
static unsigned num_msrs_to_save;
static const u32 emulated_msrs_all[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
#ifdef CONFIG_KVM_HYPERV
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_RESET,
HV_X64_MSR_VP_INDEX,
HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_STIMER0_CONFIG,
HV_X64_MSR_VP_ASSIST_PAGE,
HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL,
HV_X64_MSR_SYNDBG_OPTIONS,
HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
HV_X64_MSR_SYNDBG_PENDING_BUFFER,
#endif
MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
MSR_IA32_TSC_ADJUST,
MSR_IA32_TSC_DEADLINE,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_AMD64_TSC_RATIO,
MSR_IA32_POWER_CTL,
MSR_IA32_UCODE_REV,
/*
* KVM always supports the "true" VMX control MSRs, even if the host
* does not. The VMX MSRs as a whole are considered "emulated" as KVM
* doesn't strictly require them to exist in the host (ignoring that
* KVM would refuse to load in the first place if the core set of MSRs
* aren't supported).
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
MSR_IA32_VMX_TRUE_EXIT_CTLS,
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
MSR_IA32_VMX_MISC,
MSR_IA32_VMX_CR0_FIXED0,
MSR_IA32_VMX_CR4_FIXED0,
MSR_IA32_VMX_VMCS_ENUM,
MSR_IA32_VMX_PROCBASED_CTLS2,
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
MSR_K7_HWCR,
MSR_KVM_POLL_CONTROL,
};
static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
* List of MSRs that control the existence of MSR-based features, i.e. MSRs
* that are effectively CPUID leafs. VMX MSRs are also included in the set of
* feature MSRs, but are handled separately to allow expedited lookups.
*/
static const u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
(KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
/*
* All feature MSRs except uCode revID, which tracks the currently loaded uCode
* patch, are immutable once the vCPU model is defined.
*/ */
static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write) static bool kvm_is_immutable_feature_msr(u32 msr)
{ {
const char *op = write ? "wrmsr" : "rdmsr"; int i;
if (ignore_msrs) { if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
if (report_ignored_msrs)
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
op, msr, data);
/* Mask the error */
return true; return true;
} else {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
op, msr, data); if (msr == msr_based_features_all_except_vmx[i])
return msr != MSR_IA32_UCODE_REV;
}
return false; return false;
}
static bool kvm_is_advertised_msr(u32 msr_index)
{
unsigned int i;
for (i = 0; i < num_msrs_to_save; i++) {
if (msrs_to_save[i] == msr_index)
return true;
}
for (i = 0; i < num_emulated_msrs; i++) {
if (emulated_msrs[i] == msr_index)
return true;
}
return false;
}
typedef int (*msr_access_t)(struct kvm_vcpu *vcpu, u32 index, u64 *data,
bool host_initiated);
static __always_inline int kvm_do_msr_access(struct kvm_vcpu *vcpu, u32 msr,
u64 *data, bool host_initiated,
enum kvm_msr_access rw,
msr_access_t msr_access_fn)
{
const char *op = rw == MSR_TYPE_W ? "wrmsr" : "rdmsr";
int ret;
BUILD_BUG_ON(rw != MSR_TYPE_R && rw != MSR_TYPE_W);
/*
* Zero the data on read failures to avoid leaking stack data to the
* guest and/or userspace, e.g. if the failure is ignored below.
*/
ret = msr_access_fn(vcpu, msr, data, host_initiated);
if (ret && rw == MSR_TYPE_R)
*data = 0;
if (ret != KVM_MSR_RET_UNSUPPORTED)
return ret;
/*
* Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM
* advertises to userspace, even if an MSR isn't fully supported.
* Simply check that @data is '0', which covers both the write '0' case
* and all reads (in which case @data is zeroed on failure; see above).
*/
if (host_initiated && !*data && kvm_is_advertised_msr(msr))
return 0;
if (!ignore_msrs) {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
op, msr, *data);
return ret;
} }
if (report_ignored_msrs)
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, *data);
return 0;
} }
static struct kmem_cache *kvm_alloc_emulator_cache(void) static struct kmem_cache *kvm_alloc_emulator_cache(void)
...@@ -413,8 +626,7 @@ EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); ...@@ -413,8 +626,7 @@ EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
static void kvm_user_return_msr_cpu_online(void) static void kvm_user_return_msr_cpu_online(void)
{ {
unsigned int cpu = smp_processor_id(); struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
u64 value; u64 value;
int i; int i;
...@@ -621,12 +833,6 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ...@@ -621,12 +833,6 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload; ex->payload = payload;
} }
/* Forcibly leave the nested mode in cases like a vCPU reset */
static void kvm_leave_nested(struct kvm_vcpu *vcpu)
{
kvm_x86_ops.nested_ops->leave_nested(vcpu);
}
static void kvm_multiple_exception(struct kvm_vcpu *vcpu, static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code, unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject) bool has_payload, unsigned long payload, bool reinject)
...@@ -1411,178 +1617,6 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) ...@@ -1411,178 +1617,6 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
/*
* The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
* the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
* KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that
* require host support, i.e. should be probed via RDMSR. emulated_msrs holds
* MSRs that KVM emulates without strictly requiring host support.
* msr_based_features holds MSRs that enumerate features, i.e. are effectively
* CPUID leafs. Note, msr_based_features isn't mutually exclusive with
* msrs_to_save and emulated_msrs.
*/
static const u32 msrs_to_save_base[] = {
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
/* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
/* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
ARRAY_SIZE(msrs_to_save_pmu)];
static unsigned num_msrs_to_save;
static const u32 emulated_msrs_all[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
#ifdef CONFIG_KVM_HYPERV
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_RESET,
HV_X64_MSR_VP_INDEX,
HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_STIMER0_CONFIG,
HV_X64_MSR_VP_ASSIST_PAGE,
HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL,
HV_X64_MSR_SYNDBG_OPTIONS,
HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
HV_X64_MSR_SYNDBG_PENDING_BUFFER,
#endif
MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
MSR_IA32_TSC_ADJUST,
MSR_IA32_TSC_DEADLINE,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_AMD64_TSC_RATIO,
MSR_IA32_POWER_CTL,
MSR_IA32_UCODE_REV,
/*
* KVM always supports the "true" VMX control MSRs, even if the host
* does not. The VMX MSRs as a whole are considered "emulated" as KVM
* doesn't strictly require them to exist in the host (ignoring that
* KVM would refuse to load in the first place if the core set of MSRs
* aren't supported).
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
MSR_IA32_VMX_TRUE_EXIT_CTLS,
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
MSR_IA32_VMX_MISC,
MSR_IA32_VMX_CR0_FIXED0,
MSR_IA32_VMX_CR4_FIXED0,
MSR_IA32_VMX_VMCS_ENUM,
MSR_IA32_VMX_PROCBASED_CTLS2,
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
MSR_K7_HWCR,
MSR_KVM_POLL_CONTROL,
};
static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
* List of MSRs that control the existence of MSR-based features, i.e. MSRs
* that are effectively CPUID leafs. VMX MSRs are also included in the set of
* feature MSRs, but are handled separately to allow expedited lookups.
*/
static const u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
(KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
/*
* All feature MSRs except uCode revID, which tracks the currently loaded uCode
* patch, are immutable once the vCPU model is defined.
*/
static bool kvm_is_immutable_feature_msr(u32 msr)
{
int i;
if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
return true;
for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
if (msr == msr_based_features_all_except_vmx[i])
return msr != MSR_IA32_UCODE_REV;
}
return false;
}
/* /*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include: * does not yet virtualize. These include:
...@@ -1660,40 +1694,31 @@ static u64 kvm_get_arch_capabilities(void) ...@@ -1660,40 +1694,31 @@ static u64 kvm_get_arch_capabilities(void)
return data; return data;
} }
static int kvm_get_msr_feature(struct kvm_msr_entry *msr) static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
bool host_initiated)
{ {
switch (msr->index) { WARN_ON_ONCE(!host_initiated);
switch (index) {
case MSR_IA32_ARCH_CAPABILITIES: case MSR_IA32_ARCH_CAPABILITIES:
msr->data = kvm_get_arch_capabilities(); *data = kvm_get_arch_capabilities();
break; break;
case MSR_IA32_PERF_CAPABILITIES: case MSR_IA32_PERF_CAPABILITIES:
msr->data = kvm_caps.supported_perf_cap; *data = kvm_caps.supported_perf_cap;
break; break;
case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_REV:
rdmsrl_safe(msr->index, &msr->data); rdmsrl_safe(index, data);
break; break;
default: default:
return kvm_x86_call(get_msr_feature)(msr); return kvm_x86_call(get_feature_msr)(index, data);
} }
return 0; return 0;
} }
static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{ {
struct kvm_msr_entry msr; return kvm_do_msr_access(vcpu, index, data, true, MSR_TYPE_R,
int r; kvm_get_feature_msr);
/* Unconditionally clear the output for simplicity */
msr.data = 0;
msr.index = index;
r = kvm_get_msr_feature(&msr);
if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
r = 0;
*data = msr.data;
return r;
} }
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
...@@ -1880,16 +1905,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, ...@@ -1880,16 +1905,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return kvm_x86_call(set_msr)(vcpu, &msr); return kvm_x86_call(set_msr)(vcpu, &msr);
} }
static int _kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
bool host_initiated)
{
return __kvm_set_msr(vcpu, index, *data, host_initiated);
}
static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 data, bool host_initiated) u32 index, u64 data, bool host_initiated)
{ {
int ret = __kvm_set_msr(vcpu, index, data, host_initiated); return kvm_do_msr_access(vcpu, index, &data, host_initiated, MSR_TYPE_W,
_kvm_set_msr);
if (ret == KVM_MSR_RET_INVALID)
if (kvm_msr_ignored_check(index, data, true))
ret = 0;
return ret;
} }
/* /*
...@@ -1928,16 +1954,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, ...@@ -1928,16 +1954,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated) u32 index, u64 *data, bool host_initiated)
{ {
int ret = __kvm_get_msr(vcpu, index, data, host_initiated); return kvm_do_msr_access(vcpu, index, data, host_initiated, MSR_TYPE_R,
__kvm_get_msr);
if (ret == KVM_MSR_RET_INVALID) {
/* Unconditionally clear *data for simplicity */
*data = 0;
if (kvm_msr_ignored_check(index, 0, false))
ret = 0;
}
return ret;
} }
static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
...@@ -1999,7 +2017,7 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) ...@@ -1999,7 +2017,7 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
static u64 kvm_msr_reason(int r) static u64 kvm_msr_reason(int r)
{ {
switch (r) { switch (r) {
case KVM_MSR_RET_INVALID: case KVM_MSR_RET_UNSUPPORTED:
return KVM_MSR_EXIT_REASON_UNKNOWN; return KVM_MSR_EXIT_REASON_UNKNOWN;
case KVM_MSR_RET_FILTERED: case KVM_MSR_RET_FILTERED:
return KVM_MSR_EXIT_REASON_FILTER; return KVM_MSR_EXIT_REASON_FILTER;
...@@ -2162,31 +2180,34 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) ...@@ -2162,31 +2180,34 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
{ {
u32 msr = kvm_rcx_read(vcpu); u32 msr = kvm_rcx_read(vcpu);
u64 data; u64 data;
fastpath_t ret = EXIT_FASTPATH_NONE; fastpath_t ret;
bool handled;
kvm_vcpu_srcu_read_lock(vcpu); kvm_vcpu_srcu_read_lock(vcpu);
switch (msr) { switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4): case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu); data = kvm_read_edx_eax(vcpu);
if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
kvm_skip_emulated_instruction(vcpu);
ret = EXIT_FASTPATH_EXIT_HANDLED;
}
break; break;
case MSR_IA32_TSC_DEADLINE: case MSR_IA32_TSC_DEADLINE:
data = kvm_read_edx_eax(vcpu); data = kvm_read_edx_eax(vcpu);
if (!handle_fastpath_set_tscdeadline(vcpu, data)) { handled = !handle_fastpath_set_tscdeadline(vcpu, data);
kvm_skip_emulated_instruction(vcpu);
ret = EXIT_FASTPATH_REENTER_GUEST;
}
break; break;
default: default:
handled = false;
break; break;
} }
if (ret != EXIT_FASTPATH_NONE) if (handled) {
if (!kvm_skip_emulated_instruction(vcpu))
ret = EXIT_FASTPATH_EXIT_USERSPACE;
else
ret = EXIT_FASTPATH_REENTER_GUEST;
trace_kvm_msr_write(msr, data); trace_kvm_msr_write(msr, data);
} else {
ret = EXIT_FASTPATH_NONE;
}
kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_srcu_read_unlock(vcpu);
...@@ -3746,18 +3767,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu) ...@@ -3746,18 +3767,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
} }
static bool kvm_is_msr_to_save(u32 msr_index)
{
unsigned int i;
for (i = 0; i < num_msrs_to_save; i++) {
if (msrs_to_save[i] == msr_index)
return true;
}
return false;
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ {
u32 msr = msr_info->index; u32 msr = msr_info->index;
...@@ -4139,15 +4148,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -4139,15 +4148,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr)) if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info); return kvm_pmu_set_msr(vcpu, msr_info);
/* return KVM_MSR_RET_UNSUPPORTED;
* Userspace is allowed to write '0' to MSRs that KVM reports
* as to-be-saved, even if an MSRs isn't fully supported.
*/
if (msr_info->host_initiated && !data &&
kvm_is_msr_to_save(msr))
break;
return KVM_MSR_RET_INVALID;
} }
return 0; return 0;
} }
...@@ -4498,17 +4499,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -4498,17 +4499,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info); return kvm_pmu_get_msr(vcpu, msr_info);
/* return KVM_MSR_RET_UNSUPPORTED;
* Userspace is allowed to read MSRs that KVM reports as
* to-be-saved, even if an MSR isn't fully supported.
*/
if (msr_info->host_initiated &&
kvm_is_msr_to_save(msr_info->index)) {
msr_info->data = 0;
break;
}
return KVM_MSR_RET_INVALID;
} }
return 0; return 0;
} }
...@@ -4946,7 +4937,7 @@ long kvm_arch_dev_ioctl(struct file *filp, ...@@ -4946,7 +4937,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
break; break;
} }
case KVM_GET_MSRS: case KVM_GET_MSRS:
r = msr_io(NULL, argp, do_get_msr_feature, 1); r = msr_io(NULL, argp, do_get_feature_msr, 1);
break; break;
#ifdef CONFIG_KVM_HYPERV #ifdef CONFIG_KVM_HYPERV
case KVM_GET_SUPPORTED_HV_CPUID: case KVM_GET_SUPPORTED_HV_CPUID:
...@@ -7383,11 +7374,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) ...@@ -7383,11 +7374,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
static void kvm_probe_feature_msr(u32 msr_index) static void kvm_probe_feature_msr(u32 msr_index)
{ {
struct kvm_msr_entry msr = { u64 data;
.index = msr_index,
};
if (kvm_get_msr_feature(&msr)) if (kvm_get_feature_msr(NULL, msr_index, &data, true))
return; return;
msr_based_features[num_msr_based_features++] = msr_index; msr_based_features[num_msr_based_features++] = msr_index;
...@@ -9900,76 +9889,31 @@ void kvm_x86_vendor_exit(void) ...@@ -9900,76 +9889,31 @@ void kvm_x86_vendor_exit(void)
if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
clear_hv_tscchange_cb(); clear_hv_tscchange_cb();
#endif #endif
kvm_lapic_exit(); kvm_lapic_exit();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
}
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
irq_work_sync(&pvclock_irq_work);
cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_call(hardware_unsetup)();
kvm_mmu_vendor_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
mutex_lock(&vendor_module_lock);
kvm_x86_ops.enable_virtualization_cpu = NULL;
mutex_unlock(&vendor_module_lock);
}
EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
{
/*
* The vCPU has halted, e.g. executed HLT. Update the run state if the
* local APIC is in-kernel, the run loop will detect the non-runnable
* state and halt the vCPU. Exit to userspace if the local APIC is
* managed by userspace, in which case userspace is responsible for
* handling wake events.
*/
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
vcpu->arch.mp_state = state;
return 1;
} else {
vcpu->run->exit_reason = reason;
return 0;
}
}
int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
{
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
int ret = kvm_skip_emulated_instruction(vcpu);
/*
* TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
* KVM_EXIT_DEBUG here.
*/
return kvm_emulate_halt_noskip(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
{
int ret = kvm_skip_emulated_instruction(vcpu);
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
KVM_EXIT_AP_RESET_HOLD) && ret; cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
}
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
irq_work_sync(&pvclock_irq_work);
cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_call(hardware_unsetup)();
kvm_mmu_vendor_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
mutex_lock(&vendor_module_lock);
kvm_x86_ops.enable_virtualization_cpu = NULL;
mutex_unlock(&vendor_module_lock);
} }
EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
...@@ -11207,6 +11151,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -11207,6 +11151,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention) if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu); kvm_lapic_sync_from_vapic(vcpu);
if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE))
return 0;
r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath); r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath);
return r; return r;
...@@ -11220,6 +11167,67 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -11220,6 +11167,67 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
return r; return r;
} }
static bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
}
static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{
if (!list_empty_careful(&vcpu->async_pf.done))
return true;
if (kvm_apic_has_pending_init_or_sipi(vcpu) &&
kvm_apic_init_sipi_allowed(vcpu))
return true;
if (vcpu->arch.pv.pv_unhalted)
return true;
if (kvm_is_exception_pending(vcpu))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
kvm_x86_call(nmi_allowed)(vcpu, false)))
return true;
#ifdef CONFIG_KVM_SMM
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
(vcpu->arch.smi_pending &&
kvm_x86_call(smi_allowed)(vcpu, false)))
return true;
#endif
if (kvm_test_request(KVM_REQ_PMI, vcpu))
return true;
if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
return true;
if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu))
return true;
if (kvm_hv_has_stimer_pending(vcpu))
return true;
if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->has_events &&
kvm_x86_ops.nested_ops->has_events(vcpu, false))
return true;
if (kvm_xen_has_pending_events(vcpu))
return true;
return false;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
/* Called within kvm->srcu read side. */ /* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm_vcpu *vcpu) static inline int vcpu_block(struct kvm_vcpu *vcpu)
{ {
...@@ -11291,12 +11299,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) ...@@ -11291,12 +11299,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
}
/* Called within kvm->srcu read side. */ /* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu) static int vcpu_run(struct kvm_vcpu *vcpu)
{ {
...@@ -11348,6 +11350,98 @@ static int vcpu_run(struct kvm_vcpu *vcpu) ...@@ -11348,6 +11350,98 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
return r; return r;
} }
static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
{
/*
* The vCPU has halted, e.g. executed HLT. Update the run state if the
* local APIC is in-kernel, the run loop will detect the non-runnable
* state and halt the vCPU. Exit to userspace if the local APIC is
* managed by userspace, in which case userspace is responsible for
* handling wake events.
*/
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu))
vcpu->arch.pv.pv_unhalted = false;
else
vcpu->arch.mp_state = state;
return 1;
} else {
vcpu->run->exit_reason = reason;
return 0;
}
}
int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
{
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
int ret = kvm_skip_emulated_instruction(vcpu);
/*
* TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
* KVM_EXIT_DEBUG here.
*/
return kvm_emulate_halt_noskip(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
{
int ret;
kvm_vcpu_srcu_read_lock(vcpu);
ret = kvm_emulate_halt(vcpu);
kvm_vcpu_srcu_read_unlock(vcpu);
if (!ret)
return EXIT_FASTPATH_EXIT_USERSPACE;
if (kvm_vcpu_running(vcpu))
return EXIT_FASTPATH_REENTER_GUEST;
return EXIT_FASTPATH_EXIT_HANDLED;
}
EXPORT_SYMBOL_GPL(handle_fastpath_hlt);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
{
int ret = kvm_skip_emulated_instruction(vcpu);
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
KVM_EXIT_AP_RESET_HOLD) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_apicv_active(vcpu) &&
kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu);
}
bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
{
return vcpu->arch.preempted_in_kernel;
}
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
{
if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
#ifdef CONFIG_KVM_SMM
kvm_test_request(KVM_REQ_SMI, vcpu) ||
#endif
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
return kvm_arch_dy_has_pending_interrupt(vcpu);
}
static inline int complete_emulated_io(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{ {
return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
...@@ -13172,87 +13266,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, ...@@ -13172,87 +13266,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kvm_arch_free_memslot(kvm, old); kvm_arch_free_memslot(kvm, old);
} }
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{
if (!list_empty_careful(&vcpu->async_pf.done))
return true;
if (kvm_apic_has_pending_init_or_sipi(vcpu) &&
kvm_apic_init_sipi_allowed(vcpu))
return true;
if (vcpu->arch.pv.pv_unhalted)
return true;
if (kvm_is_exception_pending(vcpu))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
kvm_x86_call(nmi_allowed)(vcpu, false)))
return true;
#ifdef CONFIG_KVM_SMM
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
(vcpu->arch.smi_pending &&
kvm_x86_call(smi_allowed)(vcpu, false)))
return true;
#endif
if (kvm_test_request(KVM_REQ_PMI, vcpu))
return true;
if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
return true;
if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu))
return true;
if (kvm_hv_has_stimer_pending(vcpu))
return true;
if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->has_events &&
kvm_x86_ops.nested_ops->has_events(vcpu, false))
return true;
if (kvm_xen_has_pending_events(vcpu))
return true;
return false;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_apicv_active(vcpu) &&
kvm_x86_call(dy_apicv_has_pending_interrupt)(vcpu);
}
bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
{
return vcpu->arch.preempted_in_kernel;
}
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
{
if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
#ifdef CONFIG_KVM_SMM
kvm_test_request(KVM_REQ_SMI, vcpu) ||
#endif
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
return kvm_arch_dy_has_pending_interrupt(vcpu);
}
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.guest_state_protected) if (vcpu->arch.guest_state_protected)
......
...@@ -108,6 +108,12 @@ static inline unsigned int __shrink_ple_window(unsigned int val, ...@@ -108,6 +108,12 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu);
/* Forcibly leave the nested mode in cases like a vCPU reset */
static inline void kvm_leave_nested(struct kvm_vcpu *vcpu)
{
kvm_x86_ops.nested_ops->leave_nested(vcpu);
}
static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.last_vmentry_cpu != -1; return vcpu->arch.last_vmentry_cpu != -1;
...@@ -334,6 +340,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, ...@@ -334,6 +340,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len); int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
extern struct kvm_caps kvm_caps; extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host; extern struct kvm_host_values kvm_host;
...@@ -504,13 +511,26 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, ...@@ -504,13 +511,26 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
enum kvm_msr_access {
MSR_TYPE_R = BIT(0),
MSR_TYPE_W = BIT(1),
MSR_TYPE_RW = MSR_TYPE_R | MSR_TYPE_W,
};
/* /*
* Internal error codes that are used to indicate that MSR emulation encountered * Internal error codes that are used to indicate that MSR emulation encountered
* an error that should result in #GP in the guest, unless userspace * an error that should result in #GP in the guest, unless userspace handles it.
* handles it. * Note, '1', '0', and negative numbers are off limits, as they are used by KVM
* as part of KVM's lightly documented internal KVM_RUN return codes.
*
* UNSUPPORTED - The MSR isn't supported, either because it is completely
* unknown to KVM, or because the MSR should not exist according
* to the vCPU model.
*
* FILTERED - Access to the MSR is denied by a userspace MSR filter.
*/ */
#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ #define KVM_MSR_RET_UNSUPPORTED 2
#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ #define KVM_MSR_RET_FILTERED 3
#define __cr4_reserved_bits(__cpu_has, __c) \ #define __cr4_reserved_bits(__cpu_has, __c) \
({ \ ({ \
......
...@@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) ...@@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
expected_assert_msg, &assert_msg[offset]); expected_assert_msg, &assert_msg[offset]);
} }
/*
* Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional
* guest asserts guest can be verified instead of being reported as failures.
*/
static void do_vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
do {
r = __vcpu_run(vcpu);
} while (r == -1 && errno == EINTR);
TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r));
}
static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
const char *expected_assert) const char *expected_assert)
{ {
...@@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, ...@@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
struct ucall uc; struct ucall uc;
while (1) { while (1) {
vcpu_run(vcpu); do_vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
"Unexpected exit reason: %u (%s),", "Unexpected exit reason: %u (%s),",
...@@ -159,7 +174,7 @@ static void test_limits(void) ...@@ -159,7 +174,7 @@ static void test_limits(void)
vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
run = vcpu->run; run = vcpu->run;
vcpu_run(vcpu); do_vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
"Unexpected exit reason: %u (%s),", "Unexpected exit reason: %u (%s),",
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <stdint.h> #include <stdint.h>
#include "processor.h" #include "processor.h"
#include "ucall_common.h"
#define APIC_DEFAULT_GPA 0xfee00000ULL #define APIC_DEFAULT_GPA 0xfee00000ULL
...@@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) ...@@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg)
return rdmsr(APIC_BASE_MSR + (reg >> 4)); return rdmsr(APIC_BASE_MSR + (reg >> 4));
} }
static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
{
return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
}
static inline void x2apic_write_reg(unsigned int reg, uint64_t value) static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
{ {
wrmsr(APIC_BASE_MSR + (reg >> 4), value); uint8_t fault = x2apic_write_reg_safe(reg, value);
__GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
fault, APIC_BASE_MSR + (reg >> 4), value);
} }
static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
{
uint8_t fault = x2apic_write_reg_safe(reg, value);
__GUEST_ASSERT(fault == GP_VECTOR,
"Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
APIC_BASE_MSR + (reg >> 4), value, fault);
}
#endif /* SELFTEST_KVM_APIC_H */ #endif /* SELFTEST_KVM_APIC_H */
...@@ -566,9 +566,7 @@ void route_exception(struct ex_regs *regs) ...@@ -566,9 +566,7 @@ void route_exception(struct ex_regs *regs)
if (kvm_fixup_exception(regs)) if (kvm_fixup_exception(regs))
return; return;
ucall_assert(UCALL_UNHANDLED, GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
"Unhandled exception in guest", __FILE__, __LINE__,
"Unhandled exception '0x%lx' at guest RIP '0x%lx'",
regs->vector, regs->rip); regs->vector, regs->rip);
} }
...@@ -611,7 +609,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) ...@@ -611,7 +609,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{ {
struct ucall uc; struct ucall uc;
if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) if (get_ucall(vcpu, &uc) == UCALL_ABORT)
REPORT_GUEST_ASSERT(uc); REPORT_GUEST_ASSERT(uc);
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
struct xapic_vcpu { struct xapic_vcpu {
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
bool is_x2apic; bool is_x2apic;
bool has_xavic_errata;
}; };
static void xapic_guest_code(void) static void xapic_guest_code(void)
...@@ -31,6 +32,10 @@ static void xapic_guest_code(void) ...@@ -31,6 +32,10 @@ static void xapic_guest_code(void)
} }
} }
#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \
GENMASK_ULL(17, 16) | \
GENMASK_ULL(13, 13))
static void x2apic_guest_code(void) static void x2apic_guest_code(void)
{ {
asm volatile("cli"); asm volatile("cli");
...@@ -41,7 +46,12 @@ static void x2apic_guest_code(void) ...@@ -41,7 +46,12 @@ static void x2apic_guest_code(void)
uint64_t val = x2apic_read_reg(APIC_IRR) | uint64_t val = x2apic_read_reg(APIC_IRR) |
x2apic_read_reg(APIC_IRR + 0x10) << 32; x2apic_read_reg(APIC_IRR + 0x10) << 32;
if (val & X2APIC_RSVD_BITS_MASK) {
x2apic_write_reg_fault(APIC_ICR, val);
} else {
x2apic_write_reg(APIC_ICR, val); x2apic_write_reg(APIC_ICR, val);
GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
}
GUEST_SYNC(val); GUEST_SYNC(val);
} while (1); } while (1);
} }
...@@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) ...@@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
(u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
if (!x->is_x2apic) { if (!x->is_x2apic) {
if (!x->has_xavic_errata)
val &= (-1u | (0xffull << (32 + 24))); val &= (-1u | (0xffull << (32 + 24)));
TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } else if (val & X2APIC_RSVD_BITS_MASK) {
} else { return;
TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
} }
}
#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ if (x->has_xavic_errata)
GENMASK_ULL(17,16) | \ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
GENMASK_ULL(13,13)) else
TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
}
static void __test_icr(struct xapic_vcpu *x, uint64_t val) static void __test_icr(struct xapic_vcpu *x, uint64_t val)
{ {
if (x->is_x2apic) { /*
/* Hardware writing vICR register requires reserved bits 31:20, * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
* 17:16 and 13 kept as zero to avoid #GP exception. Data value * it is as _must_ be zero. Intel simply ignores the bit. Don't test
* written to vICR should mask out those bits above. * the BUSY bit for x2APIC, as there is no single correct behavior.
*/ */
val &= ~X2APIC_RSVED_BITS_MASK; if (!x->is_x2apic)
}
____test_icr(x, val | APIC_ICR_BUSY); ____test_icr(x, val | APIC_ICR_BUSY);
____test_icr(x, val & ~(u64)APIC_ICR_BUSY); ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
} }
...@@ -231,6 +242,15 @@ int main(int argc, char *argv[]) ...@@ -231,6 +242,15 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
x.is_x2apic = false; x.is_x2apic = false;
/*
* AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
* and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
* drops writes, AMD does not). Account for the errata when checking
* that KVM reads back what was written.
*/
x.has_xavic_errata = host_cpu_is_amd &&
get_kvm_amd_param_bool("avic");
vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment