Commit d8242d22 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Bugfixes, many of them reported by syzkaller and mostly predating the
  merge window"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb
  kvm: mmu: Fix race in emulated page table writes
  KVM: nVMX: vmcs12 revision_id is always VMCS12_REVISION even when copied from eVMCS
  KVM: nVMX: Verify eVMCS revision id match supported eVMCS version on eVMCS VMPTRLD
  KVM: nVMX/nSVM: Fix bug which sets vcpu->arch.tsc_offset to L1 tsc_offset
  x86/kvm/vmx: fix old-style function declaration
  KVM: x86: fix empty-body warnings
  KVM: VMX: Update shared MSRs to be saved/restored on MSR_EFER.LMA changes
  KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall
  KVM: nVMX: Fix kernel info-leak when enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS more than once
  svm: Add mutex_lock to protect apic_access_page_done on AMD systems
  KVM: X86: Fix scan ioapic use-before-initialization
  KVM: LAPIC: Fix pv ipis use-before-initialization
  KVM: VMX: re-add ple_gap module parameter
  KVM: PPC: Book3S HV: Fix handling for interrupted H_ENTER_NESTED
parents ef78e5ec fd65d314
...@@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ...@@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
ret = kvmhv_enter_nested_guest(vcpu); ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) { if (ret == H_INTERRUPT) {
kvmppc_set_gpr(vcpu, 3, 0); kvmppc_set_gpr(vcpu, 3, 0);
vcpu->arch.hcall_needed = 0;
return -EINTR; return -EINTR;
} }
break; break;
......
...@@ -1094,7 +1094,8 @@ struct kvm_x86_ops { ...@@ -1094,7 +1094,8 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void); bool (*has_wbinvd_exit)(void);
u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); /* Returns actual tsc_offset set in active VMCS */
u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
......
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
#define PRIo64 "o" #define PRIo64 "o"
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define apic_debug(fmt, arg...) #define apic_debug(fmt, arg...) do {} while (0)
/* 14 is the version for Xeon and Pentium 8.4.8*/ /* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
...@@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, ...@@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock(); rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map); map = rcu_dereference(kvm->arch.apic_map);
if (unlikely(!map)) {
count = -EOPNOTSUPP;
goto out;
}
if (min > map->max_apic_id) if (min > map->max_apic_id)
goto out; goto out;
/* Bits above cluster_size are masked in the caller. */ /* Bits above cluster_size are masked in the caller. */
......
...@@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new) ...@@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new)
} }
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes) int *bytes)
{ {
u64 gentry; u64 gentry = 0;
int r; int r;
/* /*
...@@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, ...@@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */ /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7; *gpa &= ~(gpa_t)7;
*bytes = 8; *bytes = 8;
r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
if (r)
gentry = 0;
new = (const u8 *)&gentry;
} }
switch (*bytes) { if (*bytes == 4 || *bytes == 8) {
case 4: r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
gentry = *(const u32 *)new; if (r)
break; gentry = 0;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
} }
return gentry; return gentry;
...@@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
/* /*
* No need to care whether allocation memory is successful * No need to care whether allocation memory is successful
* or not since pte prefetch is skiped if it does not have * or not since pte prefetch is skiped if it does not have
...@@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_topup_memory_caches(vcpu); mmu_topup_memory_caches(vcpu);
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
......
...@@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) ...@@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset; return vcpu->arch.tsc_offset;
} }
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0; u64 g_tsc_offset = 0;
...@@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
svm->vmcb->control.tsc_offset = offset + g_tsc_offset; svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS); mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
return svm->vmcb->control.tsc_offset;
} }
static void avic_init_vmcb(struct vcpu_svm *svm) static void avic_init_vmcb(struct vcpu_svm *svm)
...@@ -1664,20 +1665,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, ...@@ -1664,20 +1665,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
static int avic_init_access_page(struct kvm_vcpu *vcpu) static int avic_init_access_page(struct kvm_vcpu *vcpu)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
int ret; int ret = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done) if (kvm->arch.apic_access_page_done)
return 0; goto out;
ret = x86_set_memory_region(kvm, ret = __x86_set_memory_region(kvm,
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE, APIC_DEFAULT_PHYS_BASE,
PAGE_SIZE); PAGE_SIZE);
if (ret) if (ret)
return ret; goto out;
kvm->arch.apic_access_page_done = true; kvm->arch.apic_access_page_done = true;
return 0; out:
mutex_unlock(&kvm->slots_lock);
return ret;
} }
static int avic_init_backing_page(struct kvm_vcpu *vcpu) static int avic_init_backing_page(struct kvm_vcpu *vcpu)
...@@ -2189,21 +2193,31 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -2189,21 +2193,31 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return ERR_PTR(err); return ERR_PTR(err);
} }
static void svm_clear_current_vmcb(struct vmcb *vmcb)
{
int i;
for_each_online_cpu(i)
cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
}
static void svm_free_vcpu(struct kvm_vcpu *vcpu) static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
/*
* The vmcb page can be recycled, causing a false negative in
* svm_vcpu_load(). So, ensure that no logical CPU has this
* vmcb page recorded as its current vmcb.
*/
svm_clear_current_vmcb(svm->vmcb);
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave)); __free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu); kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm); kmem_cache_free(kvm_vcpu_cache, svm);
/*
* The vmcb page can be recycled, causing a false negative in
* svm_vcpu_load(). So do a full IBPB now.
*/
indirect_branch_prediction_barrier();
} }
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
...@@ -7149,7 +7163,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { ...@@ -7149,7 +7163,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit, .has_wbinvd_exit = svm_has_wbinvd_exit,
.read_l1_tsc_offset = svm_read_l1_tsc_offset, .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset, .write_l1_tsc_offset = svm_write_l1_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3, .set_tdp_cr3 = set_tdp_cr3,
......
...@@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); ...@@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
* refer SDM volume 3b section 21.6.13 & 22.1.3. * refer SDM volume 3b section 21.6.13 & 22.1.3.
*/ */
static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
module_param(ple_gap, uint, 0444);
static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, uint, 0444); module_param(ple_window, uint, 0444);
...@@ -984,6 +985,7 @@ struct vcpu_vmx { ...@@ -984,6 +985,7 @@ struct vcpu_vmx {
struct shared_msr_entry *guest_msrs; struct shared_msr_entry *guest_msrs;
int nmsrs; int nmsrs;
int save_nmsrs; int save_nmsrs;
bool guest_msrs_dirty;
unsigned long host_idt_base; unsigned long host_idt_base;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base; u64 msr_host_kernel_gs_base;
...@@ -1306,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); ...@@ -1306,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code); u16 error_code);
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type); u32 msr, int type);
static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, vmxarea);
...@@ -1610,12 +1612,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, ...@@ -1610,12 +1612,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
/* We don't support disabling the feature for simplicity. */
if (vmx->nested.enlightened_vmcs_enabled)
return 0;
vmx->nested.enlightened_vmcs_enabled = true;
/* /*
* vmcs_version represents the range of supported Enlightened VMCS * vmcs_version represents the range of supported Enlightened VMCS
* versions: lower 8 bits is the minimal version, higher 8 bits is the * versions: lower 8 bits is the minimal version, higher 8 bits is the
...@@ -1625,6 +1621,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, ...@@ -1625,6 +1621,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
if (vmcs_version) if (vmcs_version)
*vmcs_version = (KVM_EVMCS_VERSION << 8) | 1; *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
/* We don't support disabling the feature for simplicity. */
if (vmx->nested.enlightened_vmcs_enabled)
return 0;
vmx->nested.enlightened_vmcs_enabled = true;
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
...@@ -2897,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) ...@@ -2897,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->req_immediate_exit = false; vmx->req_immediate_exit = false;
/*
* Note that guest MSRs to be saved/restored can also be changed
* when guest state is loaded. This happens when guest transitions
* to/from long-mode by setting MSR_EFER.LMA.
*/
if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
vmx->guest_msrs_dirty = false;
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
}
if (vmx->loaded_cpu_state) if (vmx->loaded_cpu_state)
return; return;
...@@ -2957,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) ...@@ -2957,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmcs_writel(HOST_GS_BASE, gs_base); vmcs_writel(HOST_GS_BASE, gs_base);
host_state->gs_base = gs_base; host_state->gs_base = gs_base;
} }
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
} }
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
...@@ -3436,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) ...@@ -3436,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++); move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs; vmx->save_nmsrs = save_nmsrs;
vmx->guest_msrs_dirty = true;
if (cpu_has_vmx_msr_bitmap()) if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu); vmx_update_msr_bitmap(&vmx->vcpu);
...@@ -3452,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) ...@@ -3452,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
return vcpu->arch.tsc_offset; return vcpu->arch.tsc_offset;
} }
/* static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* writes 'offset' into guest's timestamp counter offset register
*/
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{ {
u64 active_offset = offset;
if (is_guest_mode(vcpu)) { if (is_guest_mode(vcpu)) {
/* /*
* We're here if L1 chose not to trap WRMSR to TSC. According * We're here if L1 chose not to trap WRMSR to TSC. According
...@@ -3464,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -3464,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
* set for L2 remains unchanged, and still needs to be added * set for L2 remains unchanged, and still needs to be added
* to the newly set TSC to get L2's TSC. * to the newly set TSC to get L2's TSC.
*/ */
struct vmcs12 *vmcs12; struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
/* recalculate vmcs02.TSC_OFFSET: */ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
vmcs12 = get_vmcs12(vcpu); active_offset += vmcs12->tsc_offset;
vmcs_write64(TSC_OFFSET, offset +
(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
vmcs12->tsc_offset : 0));
} else { } else {
trace_kvm_write_tsc_offset(vcpu->vcpu_id, trace_kvm_write_tsc_offset(vcpu->vcpu_id,
vmcs_read64(TSC_OFFSET), offset); vmcs_read64(TSC_OFFSET), offset);
vmcs_write64(TSC_OFFSET, offset);
} }
vmcs_write64(TSC_OFFSET, active_offset);
return active_offset;
} }
/* /*
...@@ -5944,7 +5953,7 @@ static void free_vpid(int vpid) ...@@ -5944,7 +5953,7 @@ static void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock); spin_unlock(&vmx_vpid_lock);
} }
static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type) u32 msr, int type)
{ {
int f = sizeof(unsigned long); int f = sizeof(unsigned long);
...@@ -5982,7 +5991,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit ...@@ -5982,7 +5991,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit
} }
} }
static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type) u32 msr, int type)
{ {
int f = sizeof(unsigned long); int f = sizeof(unsigned long);
...@@ -6020,7 +6029,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm ...@@ -6020,7 +6029,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm
} }
} }
static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type, bool value) u32 msr, int type, bool value)
{ {
if (value) if (value)
...@@ -8664,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) ...@@ -8664,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
vmcs12->hdr.revision_id = evmcs->revision_id;
/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
vmcs12->tpr_threshold = evmcs->tpr_threshold; vmcs12->tpr_threshold = evmcs->tpr_threshold;
vmcs12->guest_rip = evmcs->guest_rip; vmcs12->guest_rip = evmcs->guest_rip;
...@@ -9369,7 +9376,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, ...@@ -9369,7 +9376,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) { /*
* Currently, KVM only supports eVMCS version 1
* (== KVM_EVMCS_VERSION) and thus we expect guest to set this
* value to first u32 field of eVMCS which should specify eVMCS
* VersionNumber.
*
* Guest should be aware of supported eVMCS versions by host by
* examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
* expected to set this CPUID leaf according to the value
* returned in vmcs_version from nested_enable_evmcs().
*
* However, it turns out that Microsoft Hyper-V fails to comply
* to their own invented interface: When Hyper-V use eVMCS, it
* just sets first u32 field of eVMCS to revision_id specified
* in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
* which is one of the supported versions specified in
* CPUID.0x4000000A.EAX[0:15].
*
* To overcome Hyper-V bug, we accept here either a supported
* eVMCS version or VMCS12 revision_id as valid values for first
* u32 field of eVMCS.
*/
if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
(vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
nested_release_evmcs(vcpu); nested_release_evmcs(vcpu);
return 0; return 0;
} }
...@@ -9390,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, ...@@ -9390,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
* present in struct hv_enlightened_vmcs, ...). Make sure there * present in struct hv_enlightened_vmcs, ...). Make sure there
* are no leftovers. * are no leftovers.
*/ */
if (from_launch) if (from_launch) {
memset(vmx->nested.cached_vmcs12, 0, struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
sizeof(*vmx->nested.cached_vmcs12)); memset(vmcs12, 0, sizeof(*vmcs12));
vmcs12->hdr.revision_id = VMCS12_REVISION;
}
} }
return 1; return 1;
...@@ -15062,7 +15094,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { ...@@ -15062,7 +15094,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.read_l1_tsc_offset = vmx_read_l1_tsc_offset, .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset, .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3, .set_tdp_cr3 = vmx_set_cr3,
......
...@@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); ...@@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{ {
kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
vcpu->arch.tsc_offset = offset;
} }
static inline bool kvm_check_tsc_unstable(void) static inline bool kvm_check_tsc_unstable(void)
...@@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); ...@@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment) s64 adjustment)
{ {
kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
} }
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
...@@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, ...@@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
clock_pairing.nsec = ts.tv_nsec; clock_pairing.nsec = ts.tv_nsec;
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
clock_pairing.flags = 0; clock_pairing.flags = 0;
memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
ret = 0; ret = 0;
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
...@@ -7455,7 +7456,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) ...@@ -7455,7 +7456,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
else { else {
if (vcpu->arch.apicv_active) if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
} }
if (is_guest_mode(vcpu)) if (is_guest_mode(vcpu))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment