Commit 0afe64be authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Tools:
   - kvm_stat: do not show halt_wait_ns since it is not a cumulative statistic

  x86:
   - clean ups and fixes for bus lock vmexit and lazy allocation of rmaps
   - two fixes for SEV-ES (one more coming as soon as I get reviews)
   - fix for static_key underflow

  ARM:
   - Properly refcount pages used as a concatenated stage-2 PGD
   - Fix missing unlock when detecting the use of MTE+VM_SHARED"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SEV-ES: reduce ghcb_sa_len to 32 bits
  KVM: VMX: Remove redundant handling of bus lock vmexit
  KVM: kvm_stat: do not show halt_wait_ns
  KVM: x86: WARN if APIC HW/SW disable static keys are non-zero on unload
  Revert "KVM: x86: Open code necessary bits of kvm_lapic_set_base() at vCPU RESET"
  KVM: SEV-ES: Set guest_state_protected after VMSA update
  KVM: X86: fix lazy allocation of rmaps
  KVM: SEV-ES: fix length of string I/O
  KVM: arm64: Release mmap_lock when using VM_SHARED with MTE
  KVM: arm64: Report corrupted refcount at EL2
  KVM: arm64: Fix host stage-2 PGD refcount
  KVM: s390: Function documentation fixes
parents d9abdee5 9f1ee7b1
...@@ -24,6 +24,7 @@ struct hyp_pool { ...@@ -24,6 +24,7 @@ struct hyp_pool {
/* Allocation */ /* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr);
......
...@@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1; ...@@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size) static void *host_s2_zalloc_pages_exact(size_t size)
{ {
return hyp_alloc_pages(&host_s2_pool, get_order(size)); void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
hyp_split_page(hyp_virt_to_page(addr));
/*
* The size of concatenated PGDs is always a power of two of PAGE_SIZE,
* so there should be no need to free any of the tail pages to make the
* allocation exact.
*/
WARN_ON(size != (PAGE_SIZE << get_order(size)));
return addr;
} }
static void *host_s2_zalloc_page(void *pool) static void *host_s2_zalloc_page(void *pool)
......
...@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p) ...@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{ {
BUG_ON(!p->refcount);
p->refcount--; p->refcount--;
return (p->refcount == 0); return (p->refcount == 0);
} }
...@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) ...@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
hyp_spin_unlock(&pool->lock); hyp_spin_unlock(&pool->lock);
} }
void hyp_split_page(struct hyp_page *p)
{
unsigned short order = p->order;
unsigned int i;
p->order = 0;
for (i = 1; i < (1 << order); i++) {
struct hyp_page *tail = p + i;
tail->order = 0;
hyp_set_page_refcounted(tail);
}
}
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{ {
unsigned short i = order; unsigned short i = order;
......
...@@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* when updating the PG_mte_tagged page flag, see * when updating the PG_mte_tagged page flag, see
* sanitise_mte_tags for more details. * sanitise_mte_tags for more details.
*/ */
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
return -EINVAL; ret = -EINVAL;
break;
}
if (vma->vm_flags & VM_PFNMAP) { if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */ /* IO region dirty page logging not allowed */
......
...@@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, ...@@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
/** /**
* guest_translate_address - translate guest logical into guest absolute address * guest_translate_address - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
* @ar: Access register
* @gpa: Guest physical address
* @mode: Translation access mode
* *
* Parameter semantics are the same as the ones from guest_translate. * Parameter semantics are the same as the ones from guest_translate.
* The memory contents at the guest address are not changed. * The memory contents at the guest address are not changed.
...@@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, ...@@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
/** /**
* check_gva_range - test a range of guest virtual addresses for accessibility * check_gva_range - test a range of guest virtual addresses for accessibility
* @vcpu: virtual cpu
* @gva: Guest virtual address
* @ar: Access register
* @length: Length of test range
* @mode: Translation access mode
*/ */
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode) unsigned long length, enum gacc_mode mode)
...@@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, ...@@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
/** /**
* kvm_s390_check_low_addr_prot_real - check for low-address protection * kvm_s390_check_low_addr_prot_real - check for low-address protection
* @vcpu: virtual cpu
* @gra: Guest real address * @gra: Guest real address
* *
* Checks whether an address is subject to low-address protection and set * Checks whether an address is subject to low-address protection and set
...@@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) ...@@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
* @pgt: pointer to the beginning of the page table for the given address if * @pgt: pointer to the beginning of the page table for the given address if
* successful (return value 0), or to the first invalid DAT entry in * successful (return value 0), or to the first invalid DAT entry in
* case of exceptions (return value > 0) * case of exceptions (return value > 0)
* @dat_protection: referenced memory is write protected
* @fake: pgt references contiguous guest memory block, not a pgtable * @fake: pgt references contiguous guest memory block, not a pgtable
*/ */
static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
......
...@@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) ...@@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
/** /**
* handle_external_interrupt - used for external interruption interceptions * handle_external_interrupt - used for external interruption interceptions
* @vcpu: virtual cpu
* *
* This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
* the new PSW does not have external interrupts disabled. In the first case, * the new PSW does not have external interrupts disabled. In the first case,
...@@ -315,7 +316,8 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) ...@@ -315,7 +316,8 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
} }
/** /**
* Handle MOVE PAGE partial execution interception. * handle_mvpg_pei - Handle MOVE PAGE partial execution interception.
* @vcpu: virtual cpu
* *
* This interception can only happen for guests with DAT disabled and * This interception can only happen for guests with DAT disabled and
* addresses that are currently not mapped in the host. Thus we try to * addresses that are currently not mapped in the host. Thus we try to
......
...@@ -2321,13 +2321,14 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); ...@@ -2321,13 +2321,14 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{ {
struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic *apic = vcpu->arch.apic;
u64 msr_val;
int i; int i;
if (!init_event) { if (!init_event) {
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_reset_bsp(vcpu)) if (kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; msr_val |= MSR_IA32_APICBASE_BSP;
kvm_lapic_set_base(vcpu, msr_val);
} }
if (!apic) if (!apic)
...@@ -2336,11 +2337,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) ...@@ -2336,11 +2337,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
/* Stop the timer in case it's a reset to an active apic */ /* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer); hrtimer_cancel(&apic->lapic_timer.timer);
if (!init_event) { /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
apic->base_address = APIC_DEFAULT_PHYS_BASE; if (!init_event)
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
}
kvm_apic_set_version(apic->vcpu); kvm_apic_set_version(apic->vcpu);
for (i = 0; i < KVM_APIC_LVT_NUM; i++) for (i = 0; i < KVM_APIC_LVT_NUM; i++)
...@@ -2481,6 +2480,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) ...@@ -2481,6 +2480,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
lapic_timer_advance_dynamic = false; lapic_timer_advance_dynamic = false;
} }
/*
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
* apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_iodevice_init(&apic->dev, &apic_mmio_ops); kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
...@@ -2942,5 +2946,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) ...@@ -2942,5 +2946,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
void kvm_lapic_exit(void) void kvm_lapic_exit(void)
{ {
static_key_deferred_flush(&apic_hw_disabled); static_key_deferred_flush(&apic_hw_disabled);
WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
static_key_deferred_flush(&apic_sw_disabled); static_key_deferred_flush(&apic_sw_disabled);
WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
} }
...@@ -618,7 +618,12 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, ...@@ -618,7 +618,12 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
vmsa.address = __sme_pa(svm->vmsa); vmsa.address = __sme_pa(svm->vmsa);
vmsa.len = PAGE_SIZE; vmsa.len = PAGE_SIZE;
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
return ret;
vcpu->arch.guest_state_protected = true;
return 0;
} }
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
...@@ -2583,7 +2588,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) ...@@ -2583,7 +2588,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
return -EINVAL; return -EINVAL;
return kvm_sev_es_string_io(&svm->vcpu, size, port, return kvm_sev_es_string_io(&svm->vcpu, size, port,
svm->ghcb_sa, svm->ghcb_sa_len, in); svm->ghcb_sa, svm->ghcb_sa_len / size, in);
} }
void sev_es_init_vmcb(struct vcpu_svm *svm) void sev_es_init_vmcb(struct vcpu_svm *svm)
......
...@@ -191,7 +191,7 @@ struct vcpu_svm { ...@@ -191,7 +191,7 @@ struct vcpu_svm {
/* SEV-ES scratch area support */ /* SEV-ES scratch area support */
void *ghcb_sa; void *ghcb_sa;
u64 ghcb_sa_len; u32 ghcb_sa_len;
bool ghcb_sa_sync; bool ghcb_sa_sync;
bool ghcb_sa_free; bool ghcb_sa_free;
......
...@@ -5562,9 +5562,13 @@ static int handle_encls(struct kvm_vcpu *vcpu) ...@@ -5562,9 +5562,13 @@ static int handle_encls(struct kvm_vcpu *vcpu)
static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
{ {
vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; /*
vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK
return 0; * VM-Exits. Unconditionally set the flag here and leave the handling to
* vmx_handle_exit().
*/
to_vmx(vcpu)->exit_reason.bus_lock_detected = true;
return 1;
} }
/* /*
...@@ -6051,9 +6055,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) ...@@ -6051,9 +6055,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
int ret = __vmx_handle_exit(vcpu, exit_fastpath); int ret = __vmx_handle_exit(vcpu, exit_fastpath);
/* /*
* Even when current exit reason is handled by KVM internally, we * Exit to user space when bus lock detected to inform that there is
* still need to exit to user space when bus lock detected to inform * a bus lock in guest.
* that there is a bus lock in guest.
*/ */
if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { if (to_vmx(vcpu)->exit_reason.bus_lock_detected) {
if (ret > 0) if (ret > 0)
......
...@@ -11392,7 +11392,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, ...@@ -11392,7 +11392,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
int level = i + 1; int level = i + 1;
int lpages = __kvm_mmu_slot_lpages(slot, npages, level); int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
WARN_ON(slot->arch.rmap[i]); if (slot->arch.rmap[i])
continue;
slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
if (!slot->arch.rmap[i]) { if (!slot->arch.rmap[i]) {
......
...@@ -742,7 +742,7 @@ class DebugfsProvider(Provider): ...@@ -742,7 +742,7 @@ class DebugfsProvider(Provider):
The fields are all available KVM debugfs files The fields are all available KVM debugfs files
""" """
exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns'] exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns']
fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2] fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
if field not in exempt_list] if field not in exempt_list]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment