Commit 7d41e24d authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-misc-6.10' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.10:

 - Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which
   is unused by hardware, so that KVM can communicate its inability to map GPAs
   that set bits 51:48 due to lack of 5-level paging.  Guest firmware is
   expected to use the information to safely remap BARs in the uppermost GPA
   space, i.e to avoid placing a BAR at a legal, but unmappable, GPA.

 - Use vfree() instead of kvfree() for allocations that always use vcalloc()
   or __vcalloc().

 - Don't completely ignore same-value writes to immutable feature MSRs, as
   doing so results in KVM failing to reject accesses to MSR that aren't
   supposed to exist given the vCPU model and/or KVM configuration.

 - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled
   KVM-wide to avoid confusing debuggers (KVM will never bother clearing the
   ABSENT inhibit, even if userspace enables in-kernel local APIC).
parents 5a1c72e0 51937f2a
...@@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = 0; entry->eax = entry->ebx = entry->ecx = 0;
break; break;
case 0x80000008: { case 0x80000008: {
unsigned g_phys_as = (entry->eax >> 16) & 0xff; /*
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); * GuestPhysAddrSize (EAX[23:16]) is intended for software
unsigned phys_as = entry->eax & 0xff; * use.
*
* KVM's ABI is to report the effective MAXPHYADDR for the
* guest in PhysAddrSize (phys_as), and the maximum
* *addressable* GPA in GuestPhysAddrSize (g_phys_as).
*
* GuestPhysAddrSize is valid if and only if TDP is enabled,
* in which case the max GPA that can be addressed by KVM may
* be less than the max GPA that can be legally generated by
* the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't
* support 5-level TDP.
*/
unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned int phys_as, g_phys_as;
/* /*
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
...@@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) ...@@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* reductions in MAXPHYADDR for memory encryption affect shadow * reductions in MAXPHYADDR for memory encryption affect shadow
* paging, too. * paging, too.
* *
* If TDP is enabled but an explicit guest MAXPHYADDR is not * If TDP is enabled, use the raw bare metal MAXPHYADDR as
* provided, use the raw bare metal MAXPHYADDR as reductions to * reductions to the HPAs do not affect GPAs. The max
* the HPAs do not affect GPAs. * addressable GPA is the same as the max effective GPA, except
* that it's capped at 48 bits if 5-level TDP isn't supported
* (hardware processes bits 51:48 only when walking the fifth
* level page table).
*/ */
if (!tdp_enabled) if (!tdp_enabled) {
g_phys_as = boot_cpu_data.x86_phys_bits; phys_as = boot_cpu_data.x86_phys_bits;
else if (!g_phys_as) g_phys_as = 0;
} else {
phys_as = entry->eax & 0xff;
g_phys_as = phys_as; g_phys_as = phys_as;
if (kvm_mmu_get_max_tdp_level() < 5)
g_phys_as = min(g_phys_as, 48);
}
entry->eax = g_phys_as | (virt_as << 8); entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0; entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX); cpuid_entry_override(entry, CPUID_8000_0008_EBX);
......
...@@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void) ...@@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void)
return boot_cpu_data.x86_phys_bits; return boot_cpu_data.x86_phys_bits;
} }
u8 kvm_mmu_get_max_tdp_level(void);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask); void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
......
...@@ -5316,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) ...@@ -5316,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
return max_tdp_level; return max_tdp_level;
} }
u8 kvm_mmu_get_max_tdp_level(void)
{
return tdp_root_level ? tdp_root_level : max_tdp_level;
}
static union kvm_mmu_page_role static union kvm_mmu_page_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
union kvm_cpu_role cpu_role) union kvm_cpu_role cpu_role)
......
...@@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) ...@@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{ {
kvfree(slot->arch.gfn_write_track); vfree(slot->arch.gfn_write_track);
slot->arch.gfn_write_track = NULL; slot->arch.gfn_write_track = NULL;
} }
......
...@@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition, ...@@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition,
); );
/* /*
* Tracepoint for VT-d posted-interrupts. * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
*/ */
TRACE_EVENT(kvm_pi_irte_update, TRACE_EVENT(kvm_pi_irte_update,
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
...@@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update, ...@@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update,
__entry->set = set; __entry->set = set;
), ),
TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, " TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
"gvec: 0x%x, pi_desc_addr: 0x%llx", "gvec: 0x%x, pi_desc_addr: 0x%llx",
__entry->set ? "enabled and being updated" : "disabled", __entry->set ? "enabled and being updated" : "disabled",
__entry->host_irq, __entry->host_irq,
......
...@@ -2233,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) ...@@ -2233,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
/* /*
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
* not support modifying the guest vCPU model on the fly, e.g. changing * not support modifying the guest vCPU model on the fly, e.g. changing
* the nVMX capabilities while L2 is running is nonsensical. Ignore * the nVMX capabilities while L2 is running is nonsensical. Allow
* writes of the same value, e.g. to allow userspace to blindly stuff * writes of the same value, e.g. to allow userspace to blindly stuff
* all MSRs when emulating RESET. * all MSRs when emulating RESET.
*/ */
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
if (do_get_msr(vcpu, index, &val) || *data != val) (do_get_msr(vcpu, index, &val) || *data != val))
return -EINVAL; return -EINVAL;
return 0;
}
return kvm_set_msr_ignored_check(vcpu, index, *data, true); return kvm_set_msr_ignored_check(vcpu, index, *data, true);
} }
...@@ -10031,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits, ...@@ -10031,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
static void kvm_apicv_init(struct kvm *kvm) static void kvm_apicv_init(struct kvm *kvm)
{ {
unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons; enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
APICV_INHIBIT_REASON_DISABLE;
init_rwsem(&kvm->arch.apicv_update_lock); set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
if (!enable_apicv) init_rwsem(&kvm->arch.apicv_update_lock);
set_or_clear_apicv_inhibit(inhibits,
APICV_INHIBIT_REASON_DISABLE, true);
} }
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
...@@ -12805,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot) ...@@ -12805,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot)
int i; int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.rmap[i]); vfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL; slot->arch.rmap[i] = NULL;
} }
} }
...@@ -12817,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) ...@@ -12817,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
memslot_rmap_free(slot); memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.lpage_info[i - 1]); vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL; slot->arch.lpage_info[i - 1] = NULL;
} }
...@@ -12909,7 +12903,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, ...@@ -12909,7 +12903,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
memslot_rmap_free(slot); memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.lpage_info[i - 1]); vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL; slot->arch.lpage_info[i - 1] = NULL;
} }
return -ENOMEM; return -ENOMEM;
......
...@@ -974,7 +974,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) ...@@ -974,7 +974,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
if (!memslot->dirty_bitmap) if (!memslot->dirty_bitmap)
return; return;
kvfree(memslot->dirty_bitmap); vfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL; memslot->dirty_bitmap = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment