Commit 685ed983 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "s390:

   - PCI interpretation compile fixes

  RISC-V:

   - fix unused variable warnings in vcpu_timer.c

   - move extern sbi_ext declarations to a header

  x86:

   - check validity of argument to KVM_SET_MP_STATE

   - use guest's global_ctrl to completely disable guest PEBS

   - fix a memory leak on memory allocation failure

   - mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES

   - fix build failure with Clang integrated assembler

   - fix MSR interception

   - always flush TLBs when enabling dirty logging"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: check validity of argument to KVM_SET_MP_STATE
  perf/x86/core: Completely disable guest PEBS via guest's global_ctrl
  KVM: x86: fix memoryleak in kvm_arch_vcpu_create()
  KVM: x86: Mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES
  KVM: s390: pci: Hook to access KVM lowlevel from VFIO
  riscv: kvm: move extern sbi_ext declarations to a header
  riscv: kvm: vcpu_timer: fix unused variable warnings
  KVM: selftests: Fix ambiguous mov in KVM_ASM_SAFE()
  KVM: selftests: Fix KVM_EXCEPTION_MAGIC build with Clang
  KVM: VMX: Heed the 'msr' argument in msr_write_intercepted()
  kvm: x86: mmu: Always flush TLBs when enabling dirty logging
  kvm: x86: mmu: Drop the need_remote_flush() function
parents b0839b28 29250ba5
...@@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, ...@@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
u32 type, u64 flags); u32 type, u64 flags);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
#ifdef CONFIG_RISCV_SBI_V01
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
#endif
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
#endif /* __RISCV_KVM_VCPU_SBI_H__ */ #endif /* __RISCV_KVM_VCPU_SBI_H__ */
...@@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err) ...@@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err)
}; };
} }
#ifdef CONFIG_RISCV_SBI_V01 #ifndef CONFIG_RISCV_SBI_V01
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
#else
static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
.extid_start = -1UL, .extid_start = -1UL,
.extid_end = -1UL, .extid_end = -1UL,
.handler = NULL, .handler = NULL,
}; };
#endif #endif
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
&vcpu_sbi_ext_v01, &vcpu_sbi_ext_v01,
......
...@@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) ...@@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
{ {
struct kvm_vcpu_csr *csr;
struct kvm_vcpu_timer *t = &vcpu->arch.timer; struct kvm_vcpu_timer *t = &vcpu->arch.timer;
kvm_riscv_vcpu_update_timedelta(vcpu); kvm_riscv_vcpu_update_timedelta(vcpu);
...@@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) ...@@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
if (!t->sstc_enabled) if (!t->sstc_enabled)
return; return;
csr = &vcpu->arch.guest_csr;
#if defined(CONFIG_32BIT) #if defined(CONFIG_32BIT)
csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
...@@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) ...@@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
{ {
struct kvm_vcpu_csr *csr;
struct kvm_vcpu_timer *t = &vcpu->arch.timer; struct kvm_vcpu_timer *t = &vcpu->arch.timer;
if (!t->sstc_enabled) if (!t->sstc_enabled)
return; return;
csr = &vcpu->arch.guest_csr;
t = &vcpu->arch.timer; t = &vcpu->arch.timer;
#if defined(CONFIG_32BIT) #if defined(CONFIG_32BIT)
t->next_cycles = csr_read(CSR_VSTIMECMP); t->next_cycles = csr_read(CSR_VSTIMECMP);
......
...@@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} ...@@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#define __KVM_HAVE_ARCH_VM_FREE #define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm); void kvm_arch_free_vm(struct kvm *kvm);
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM struct zpci_kvm_hook {
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm); int (*kvm_register)(void *opaque, struct kvm *kvm);
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev); void (*kvm_unregister)(void *opaque);
#else };
static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
struct kvm *kvm) extern struct zpci_kvm_hook zpci_kvm_hook;
{
return -EPERM;
}
static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
#endif
#endif #endif
...@@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) ...@@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
* available, enable them and let userspace indicate whether or not they will * available, enable them and let userspace indicate whether or not they will
* be used (specify SHM bit to disable). * be used (specify SHM bit to disable).
*/ */
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{ {
struct zpci_dev *zdev = opaque;
int rc; int rc;
if (!zdev) if (!zdev)
...@@ -510,10 +511,10 @@ int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) ...@@ -510,10 +511,10 @@ int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
kvm_put_kvm(kvm); kvm_put_kvm(kvm);
return rc; return rc;
} }
EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) static void kvm_s390_pci_unregister_kvm(void *opaque)
{ {
struct zpci_dev *zdev = opaque;
struct kvm *kvm; struct kvm *kvm;
if (!zdev) if (!zdev)
...@@ -566,7 +567,6 @@ void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) ...@@ -566,7 +567,6 @@ void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
kvm_put_kvm(kvm); kvm_put_kvm(kvm);
} }
EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
void kvm_s390_pci_init_list(struct kvm *kvm) void kvm_s390_pci_init_list(struct kvm *kvm)
{ {
...@@ -678,6 +678,8 @@ int kvm_s390_pci_init(void) ...@@ -678,6 +678,8 @@ int kvm_s390_pci_init(void)
spin_lock_init(&aift->gait_lock); spin_lock_init(&aift->gait_lock);
mutex_init(&aift->aift_lock); mutex_init(&aift->aift_lock);
zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
return 0; return 0;
} }
...@@ -685,6 +687,8 @@ int kvm_s390_pci_init(void) ...@@ -685,6 +687,8 @@ int kvm_s390_pci_init(void)
void kvm_s390_pci_exit(void) void kvm_s390_pci_exit(void)
{ {
mutex_destroy(&aift->aift_lock); mutex_destroy(&aift->aift_lock);
zpci_kvm_hook.kvm_register = NULL;
zpci_kvm_hook.kvm_unregister = NULL;
kfree(aift); kfree(aift);
} }
...@@ -5,5 +5,5 @@ ...@@ -5,5 +5,5 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_PCI_IOV) += pci_iov.o
// SPDX-License-Identifier: GPL-2.0-only
/*
* VFIO ZPCI devices support
*
* Copyright (C) IBM Corp. 2022. All rights reserved.
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
*/
#include <linux/kvm_host.h>
struct zpci_kvm_hook zpci_kvm_hook;
EXPORT_SYMBOL_GPL(zpci_kvm_hook);
...@@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) ...@@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
/* Disable guest PEBS if host PEBS is enabled. */ /* Disable guest PEBS if host PEBS is enabled. */
arr[pebs_enable].guest = 0; arr[pebs_enable].guest = 0;
} else { } else {
/* Disable guest PEBS for cross-mapped PEBS counters. */ /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
arr[global_ctrl].guest |= arr[pebs_enable].guest; arr[global_ctrl].guest |= arr[pebs_enable].guest;
} }
......
...@@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) ...@@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
} }
static bool need_remote_flush(u64 old, u64 new)
{
if (!is_shadow_present_pte(old))
return false;
if (!is_shadow_present_pte(new))
return true;
if ((old ^ new) & SPTE_BASE_ADDR_MASK)
return true;
old ^= shadow_nx_mask;
new ^= shadow_nx_mask;
return (old & ~new & SPTE_PERM_MASK) != 0;
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
int *bytes) int *bytes)
{ {
...@@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
if (gentry && sp->role.level != PG_LEVEL_4K) if (gentry && sp->role.level != PG_LEVEL_4K)
++vcpu->kvm->stat.mmu_pde_zapped; ++vcpu->kvm->stat.mmu_pde_zapped;
if (need_remote_flush(entry, *spte)) if (is_shadow_present_pte(entry))
flush = true; flush = true;
++spte; ++spte;
} }
...@@ -6085,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, ...@@ -6085,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
const struct kvm_memory_slot *memslot, const struct kvm_memory_slot *memslot,
int start_level) int start_level)
{ {
bool flush = false;
if (kvm_memslots_have_rmaps(kvm)) { if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock); write_lock(&kvm->mmu_lock);
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, slot_handle_level(kvm, memslot, slot_rmap_write_protect,
start_level, KVM_MAX_HUGEPAGE_LEVEL, start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
false);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
} }
if (is_tdp_mmu_enabled(kvm)) { if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock); read_lock(&kvm->mmu_lock);
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
read_unlock(&kvm->mmu_lock); read_unlock(&kvm->mmu_lock);
} }
/*
* Flush TLBs if any SPTEs had to be write-protected to ensure that
* guest writes are reflected in the dirty bitmap before the memslot
* update completes, i.e. before enabling dirty logging is visible to
* userspace.
*
* Perform the TLB flush outside the mmu_lock to reduce the amount of
* time the lock is held. However, this does mean that another CPU can
* now grab mmu_lock and encounter a write-protected SPTE while CPUs
* still have a writable mapping for the associated GFN in their TLB.
*
* This is safe but requires KVM to be careful when making decisions
* based on the write-protection status of an SPTE. Specifically, KVM
* also write-protects SPTEs to monitor changes to guest page tables
* during shadow paging, and must guarantee no CPUs can write to those
* page before the lock is dropped. As mentioned in the previous
* paragraph, a write-protected SPTE is no guarantee that CPU cannot
* perform writes. So to determine if a TLB flush is truly required, KVM
* will clear a separate software-only bit (MMU-writable) and skip the
* flush if-and-only-if this bit was already clear.
*
* See is_writable_pte() for more details.
*/
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
} }
static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
...@@ -6493,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, ...@@ -6493,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot) const struct kvm_memory_slot *memslot)
{ {
bool flush = false;
if (kvm_memslots_have_rmaps(kvm)) { if (kvm_memslots_have_rmaps(kvm)) {
write_lock(&kvm->mmu_lock); write_lock(&kvm->mmu_lock);
/* /*
* Clear dirty bits only on 4k SPTEs since the legacy MMU only * Clear dirty bits only on 4k SPTEs since the legacy MMU only
* support dirty logging at a 4k granularity. * support dirty logging at a 4k granularity.
*/ */
flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
} }
if (is_tdp_mmu_enabled(kvm)) { if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock); read_lock(&kvm->mmu_lock);
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
read_unlock(&kvm->mmu_lock); read_unlock(&kvm->mmu_lock);
} }
/* /*
* The caller will flush the TLBs after this function returns.
*
* It's also safe to flush TLBs out of mmu lock here as currently this * It's also safe to flush TLBs out of mmu lock here as currently this
* function is only used for dirty logging, in which case flushing TLB * function is only used for dirty logging, in which case flushing TLB
* out of mmu lock also guarantees no dirty pages will be lost in * out of mmu lock also guarantees no dirty pages will be lost in
* dirty_bitmap. * dirty_bitmap.
*/ */
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
} }
void kvm_mmu_zap_all(struct kvm *kvm) void kvm_mmu_zap_all(struct kvm *kvm)
......
...@@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, ...@@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
} }
/* /*
* An shadow-present leaf SPTE may be non-writable for 3 possible reasons: * A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
* *
* 1. To intercept writes for dirty logging. KVM write-protects huge pages * 1. To intercept writes for dirty logging. KVM write-protects huge pages
* so that they can be split be split down into the dirty logging * so that they can be split be split down into the dirty logging
...@@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, ...@@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
* read-only memslot or guest memory backed by a read-only VMA. Writes to * read-only memslot or guest memory backed by a read-only VMA. Writes to
* such pages are disallowed entirely. * such pages are disallowed entirely.
* *
* To keep track of why a given SPTE is write-protected, KVM uses 2 * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this
* software-only bits in the SPTE: * case, the SPTE is access-protected, not just write-protected!
*
* For cases #1 and #4, KVM can safely make such SPTEs writable without taking
* mmu_lock as capturing the Accessed/Dirty state doesn't require taking it.
* To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits
* in the SPTE:
* *
* shadow_mmu_writable_mask, aka MMU-writable - * shadow_mmu_writable_mask, aka MMU-writable -
* Cleared on SPTEs that KVM is currently write-protecting for shadow paging * Cleared on SPTEs that KVM is currently write-protecting for shadow paging
...@@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, ...@@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
* shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging
* (which does not clear the MMU-writable bit), does not flush TLBs before * (which does not clear the MMU-writable bit), does not flush TLBs before
* dropping the lock, as it only needs to synchronize guest writes with the * dropping the lock, as it only needs to synchronize guest writes with the
* dirty bitmap. * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for
* access-tracking via the clear_young() MMU notifier also does not flush TLBs.
* *
* So, there is the problem: clearing the MMU-writable bit can encounter a * So, there is the problem: clearing the MMU-writable bit can encounter a
* write-protected SPTE while CPUs still have writable mappings for that SPTE * write-protected SPTE while CPUs still have writable mappings for that SPTE
......
...@@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) ...@@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
return true; return true;
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
MSR_IA32_SPEC_CTRL);
} }
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
......
...@@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = { ...@@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = {
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static unsigned int num_msr_based_features; static unsigned int num_msr_based_features;
/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include:
* 10 - MISC_PACKAGE_CTRLS
* 11 - ENERGY_FILTERING_CTL
* 12 - DOITM
* 18 - FB_CLEAR_CTRL
* 21 - XAPIC_DISABLE_STATUS
* 23 - OVERCLOCKING_STATUS
*/
#define KVM_SUPPORTED_ARCH_CAP \
(ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
static u64 kvm_get_arch_capabilities(void) static u64 kvm_get_arch_capabilities(void)
{ {
u64 data = 0; u64 data = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
data &= KVM_SUPPORTED_ARCH_CAP;
}
/* /*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
...@@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void) ...@@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void)
*/ */
} }
/* Guests don't need to know "Fill buffer clear control" exists */
data &= ~ARCH_CAP_FB_CLEAR_CTRL;
return data; return data;
} }
...@@ -10652,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) ...@@ -10652,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
case KVM_MP_STATE_INIT_RECEIVED: case KVM_MP_STATE_INIT_RECEIVED:
break; break;
default: default:
return -EINTR; WARN_ON_ONCE(1);
break;
} }
return 1; return 1;
} }
...@@ -11093,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ...@@ -11093,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu); vcpu_load(vcpu);
if (!lapic_in_kernel(vcpu) && switch (mp_state->mp_state) {
mp_state->mp_state != KVM_MP_STATE_RUNNABLE) case KVM_MP_STATE_UNINITIALIZED:
case KVM_MP_STATE_HALTED:
case KVM_MP_STATE_AP_RESET_HOLD:
case KVM_MP_STATE_INIT_RECEIVED:
case KVM_MP_STATE_SIPI_RECEIVED:
if (!lapic_in_kernel(vcpu))
goto out;
break;
case KVM_MP_STATE_RUNNABLE:
break;
default:
goto out; goto out;
}
/* /*
* KVM_MP_STATE_INIT_RECEIVED means the processor is in * KVM_MP_STATE_INIT_RECEIVED means the processor is in
...@@ -11563,7 +11594,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -11563,7 +11594,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64), vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
GFP_KERNEL_ACCOUNT); GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks) if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
goto fail_free_pio_data; goto fail_free_mce_banks;
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
...@@ -11617,7 +11648,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -11617,7 +11648,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fail_free_mce_banks: fail_free_mce_banks:
kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mce_banks);
kfree(vcpu->arch.mci_ctl2_banks); kfree(vcpu->arch.mci_ctl2_banks);
fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data); free_page((unsigned long)vcpu->arch.pio_data);
fail_free_lapic: fail_free_lapic:
kvm_free_lapic(vcpu); kvm_free_lapic(vcpu);
...@@ -12473,6 +12503,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, ...@@ -12473,6 +12503,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
} else { } else {
kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
} }
/*
* Unconditionally flush the TLBs after enabling dirty logging.
* A flush is almost always going to be necessary (see below),
* and unconditionally flushing allows the helpers to omit
* the subtly complex checks when removing write access.
*
* Do the flush outside of mmu_lock to reduce the amount of
* time mmu_lock is held. Flushing after dropping mmu_lock is
* safe as KVM only needs to guarantee the slot is fully
* write-protected before returning to userspace, i.e. before
* userspace can consume the dirty status.
*
* Flushing outside of mmu_lock requires KVM to be careful when
* making decisions based on writable status of an SPTE, e.g. a
* !writable SPTE doesn't guarantee a CPU can't perform writes.
*
* Specifically, KVM also write-protects guest page tables to
* monitor changes when using shadow paging, and must guarantee
* no CPUs can write to those page before mmu_lock is dropped.
* Because CPUs may have stale TLB entries at this point, a
* !writable SPTE doesn't guarantee CPUs can't perform writes.
*
* KVM also allows making SPTES writable outside of mmu_lock,
* e.g. to allow dirty logging without taking mmu_lock.
*
* To handle these scenarios, KVM uses a separate software-only
* bit (MMU-writable) to track if a SPTE is !writable due to
* a guest page table being write-protected (KVM clears the
* MMU-writable flag when write-protecting for shadow paging).
*
* The use of MMU-writable is also the primary motivation for
* the unconditional flush. Because KVM must guarantee that a
* CPU doesn't contain stale, writable TLB entries for a
* !MMU-writable SPTE, KVM must flush if it encounters any
* MMU-writable SPTE regardless of whether the actual hardware
* writable bit was set. I.e. KVM is almost guaranteed to need
* to flush, while unconditionally flushing allows the "remove
* write access" helpers to ignore MMU-writable entirely.
*
* See is_writable_pte() for more details (the case involving
* access-tracked SPTEs is particularly relevant).
*/
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
} }
} }
......
...@@ -151,7 +151,10 @@ int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) ...@@ -151,7 +151,10 @@ int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
if (!vdev->vdev.kvm) if (!vdev->vdev.kvm)
return 0; return 0;
return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm); if (zpci_kvm_hook.kvm_register)
return zpci_kvm_hook.kvm_register(zdev, vdev->vdev.kvm);
return -ENOENT;
} }
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
...@@ -161,5 +164,6 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) ...@@ -161,5 +164,6 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
if (!zdev || !vdev->vdev.kvm) if (!zdev || !vdev->vdev.kvm)
return; return;
kvm_s390_pci_unregister_kvm(zdev); if (zpci_kvm_hook.kvm_unregister)
zpci_kvm_hook.kvm_unregister(zdev);
} }
...@@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, ...@@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *)); void (*handler)(struct ex_regs *));
/* If a toddler were to say "abracadabra". */ /* If a toddler were to say "abracadabra". */
#define KVM_EXCEPTION_MAGIC 0xabacadabaull #define KVM_EXCEPTION_MAGIC 0xabacadabaULL
/* /*
* KVM selftest exception fixup uses registers to coordinate with the exception * KVM selftest exception fixup uses registers to coordinate with the exception
...@@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, ...@@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
"lea 1f(%%rip), %%r10\n\t" \ "lea 1f(%%rip), %%r10\n\t" \
"lea 2f(%%rip), %%r11\n\t" \ "lea 2f(%%rip), %%r11\n\t" \
"1: " insn "\n\t" \ "1: " insn "\n\t" \
"mov $0, %[vector]\n\t" \ "movb $0, %[vector]\n\t" \
"jmp 3f\n\t" \ "jmp 3f\n\t" \
"2:\n\t" \ "2:\n\t" \
"mov %%r9b, %[vector]\n\t" \ "mov %%r9b, %[vector]\n\t" \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment