Commit f292dc8a authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-misc-6.7' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.7:

 - Add CONFIG_KVM_MAX_NR_VCPUS to allow supporting up to 4096 vCPUs without
   forcing more common use cases to eat the extra memory overhead.

 - Add IBPB and SBPB virtualization support.

 - Fix a bug where restoring a vCPU snapshot that was taken within 1 second of
   creating the original vCPU would cause KVM to try to synchronize the vCPU's
   TSC and thus clobber the correct TSC being set by userspace.

 - Compute guest wall clock using a single TSC read to avoid generating an
   inaccurate time, e.g. if the vCPU is preempted between multiple TSC reads.

 - "Virtualize" HWCR.TscFreqSel to make Linux guests happy, which complain
    about a "Firmware Bug" if the bit isn't set for select F/M/S combos.

 - Don't apply side effects to Hyper-V's synthetic timer on writes from
   userspace to fix an issue where the auto-enable behavior can trigger
   spurious interrupts, i.e. do auto-enabling only for guest writes.

 - Remove an unnecessary kick of all vCPUs when synchronizing the dirty log
   without PML enabled.

 - Advertise "support" for non-serializing FS/GS base MSR writes as appropriate.

 - Use octal notation for file permissions through KVM x86.

 - Fix a handful of typo fixes and warts.
parents fadaf574 2770d472
......@@ -443,6 +443,7 @@
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
......
......@@ -39,7 +39,15 @@
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
/*
* CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if
* KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS).
*/
#ifdef CONFIG_KVM_MAX_NR_VCPUS
#define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS
#else
#define KVM_MAX_VCPUS 1024
#endif
/*
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
......@@ -1275,7 +1283,6 @@ struct kvm_arch {
*/
spinlock_t mmu_unsync_pages_lock;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
......@@ -1323,6 +1330,7 @@ struct kvm_arch {
int nr_vcpus_matched_tsc;
u32 default_tsc_khz;
bool user_set_tsc;
seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
......@@ -1691,7 +1699,7 @@ struct kvm_x86_ops {
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
/*
* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
......
......@@ -553,6 +553,7 @@
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_TW_CFG 0xc0011023
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
......
......@@ -154,4 +154,15 @@ config KVM_PROVE_MMU
config KVM_EXTERNAL_WRITE_TRACKING
bool
config KVM_MAX_NR_VCPUS
int "Maximum number of vCPUs per KVM guest"
depends on KVM
range 1024 4096
default 4096 if MAXSMP
default 1024
help
Set the maximum number of vCPUs per KVM guest. Larger values will increase
the memory footprint of each KVM guest, regardless of how many vCPUs are
created for a given VM.
endif # VIRTUALIZATION
......@@ -753,11 +753,13 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
F(WRMSR_XX_BASE_NS)
);
if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
F(PERFMON_V2)
......
......@@ -174,7 +174,8 @@ static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
{
return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB));
guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) ||
guest_cpuid_has(vcpu, X86_FEATURE_SBPB));
}
static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
......
......@@ -727,10 +727,12 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
stimer_cleanup(stimer);
stimer->count = count;
if (stimer->count == 0)
stimer->config.enable = 0;
else if (stimer->config.auto_enable)
stimer->config.enable = 1;
if (!host) {
if (stimer->count == 0)
stimer->config.enable = 0;
else if (stimer->config.auto_enable)
stimer->config.enable = 1;
}
if (stimer->config.enable)
stimer_mark_pending(stimer, false);
......
......@@ -324,7 +324,6 @@ void enter_smm(struct kvm_vcpu *vcpu)
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;
static_call(kvm_x86_set_cr4)(vcpu, 0);
......
......@@ -199,7 +199,7 @@ module_param_named(npt, npt_enabled, bool, 0444);
/* allow nested virtualization in KVM/SVM */
static int nested = true;
module_param(nested, int, S_IRUGO);
module_param(nested, int, 0444);
/* enable/disable Next RIP Save */
int nrips = true;
......
......@@ -82,28 +82,28 @@ bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
static bool __read_mostly enable_vnmi = 1;
module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
module_param_named(vnmi, enable_vnmi, bool, 0444);
bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
module_param_named(flexpriority, flexpriority_enabled, bool, 0444);
bool __read_mostly enable_ept = 1;
module_param_named(ept, enable_ept, bool, S_IRUGO);
module_param_named(ept, enable_ept, bool, 0444);
bool __read_mostly enable_unrestricted_guest = 1;
module_param_named(unrestricted_guest,
enable_unrestricted_guest, bool, S_IRUGO);
enable_unrestricted_guest, bool, 0444);
bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
module_param(emulate_invalid_guest_state, bool, 0444);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
module_param(fasteoi, bool, 0444);
module_param(enable_apicv, bool, S_IRUGO);
module_param(enable_apicv, bool, 0444);
bool __read_mostly enable_ipiv = true;
module_param(enable_ipiv, bool, 0444);
......@@ -114,10 +114,10 @@ module_param(enable_ipiv, bool, 0444);
* use VMX instructions.
*/
static bool __read_mostly nested = 1;
module_param(nested, bool, S_IRUGO);
module_param(nested, bool, 0444);
bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
module_param_named(pml, enable_pml, bool, 0444);
static bool __read_mostly error_on_inconsistent_vmcs_config = true;
module_param(error_on_inconsistent_vmcs_config, bool, 0444);
......
This diff is collapsed.
......@@ -293,6 +293,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
......
......@@ -59,7 +59,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
* This code mirrors kvm_write_wall_clock() except that it writes
* directly through the pfn cache and doesn't mark the page dirty.
*/
wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
wall_nsec = kvm_get_wall_clock_epoch(kvm);
/* It could be invalid again already, so we need to check */
read_lock_irq(&gpc->lock);
......@@ -98,7 +98,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
wc_version = wc->version = (wc->version + 1) | 1;
smp_wmb();
wc->nsec = do_div(wall_nsec, 1000000000);
wc->nsec = do_div(wall_nsec, NSEC_PER_SEC);
wc->sec = (u32)wall_nsec;
*wc_sec_hi = wall_nsec >> 32;
smp_wmb();
......
......@@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2023, Google LLC.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "vmx.h"
void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
{
const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
const uint64_t legal = ignored | valid;
uint64_t val = BIT_ULL(bit);
uint64_t actual;
int r;
r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
TEST_ASSERT(val & ~legal ? !r : r == 1,
"Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
val, val & ~legal ? "fail" : "succeed");
actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
TEST_ASSERT(actual == (val & valid),
"Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
bit, actual, (val & valid));
vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
}
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
unsigned int bit;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
for (bit = 0; bit < BITS_PER_LONG; bit++)
test_hwcr_bit(vcpu, bit);
kvm_vm_free(vm);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment