Commit 4992eb41 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:

 -  x86 bugfixes

 - Documentation fixes

 - Avoid performance regression due to SEV-ES patches

 - ARM:
     - Don't allow tagged pointers to point to memslots
     - Filter out ARMv8.1+ PMU events on v8.0 hardware
     - Hide PMU registers from userspace when no PMU is configured
     - More PMU cleanups
     - Don't try to handle broken PSCI firmware
     - More sys_reg() to reg_to_encoding() conversions

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: allow KVM_REQ_GET_NESTED_STATE_PAGES outside guest mode for VMX
  KVM: x86: Revert "KVM: x86: Mark GPRs dirty when written"
  KVM: SVM: Unconditionally sync GPRs to GHCB on VMRUN of SEV-ES guest
  KVM: nVMX: Sync unsync'd vmcs02 state to vmcs12 on migration
  kvm: tracing: Fix unmatched kvm_entry and kvm_exit events
  KVM: Documentation: Update description of KVM_{GET,CLEAR}_DIRTY_LOG
  KVM: x86: get smi pending status correctly
  KVM: x86/pmu: Fix HW_REF_CPU_CYCLES event pseudo-encoding in intel_arch_events[]
  KVM: x86/pmu: Fix UBSAN shift-out-of-bounds warning in intel_pmu_refresh()
  KVM: x86: Add more protection against undefined behavior in rsvd_bits()
  KVM: Documentation: Fix spec for KVM_CAP_ENABLE_CAP_VM
  KVM: Forbid the use of tagged userspace addresses for memslots
  KVM: arm64: Filter out v8.1+ events on v8.0 HW
  KVM: arm64: Compute TPIDR_EL2 ignoring MTE tag
  KVM: arm64: Use the reg_to_encoding() macro instead of sys_reg()
  KVM: arm64: Allow PSCI SYSTEM_OFF/RESET to return
  KVM: arm64: Simplify handling of absent PMU system registers
  KVM: arm64: Hide PMU registers from userspace when not available
parents c7230a48 9a78e158
......@@ -360,10 +360,9 @@ since the last call to this ioctl. Bit 0 is the first page in the
memory slot. Ensure the entire structure is cleared to avoid padding
issues.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
the address space for which you want to return the dirty bitmap.
They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies
the address space for which you want to return the dirty bitmap. See
KVM_SET_USER_MEMORY_REGION for details on the usage of slot field.
The bits in the dirty bitmap are cleared before the ioctl returns, unless
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
......@@ -1281,6 +1280,9 @@ field userspace_addr, which must point at user addressable memory for
the entire memory slot size. Any object may back this memory, including
anonymous memory, ordinary files, and hugetlbfs.
On architectures that support a form of address tagging, userspace_addr must
be an untagged address.
It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
be identical. This allows large pages in the guest to be backed by large
pages in the host.
......@@ -1333,7 +1335,7 @@ documentation when it pops into existence).
:Capability: KVM_CAP_ENABLE_CAP_VM
:Architectures: all
:Type: vcpu ioctl
:Type: vm ioctl
:Parameters: struct kvm_enable_cap (in)
:Returns: 0 on success; -1 on error
......@@ -4432,7 +4434,7 @@ to I/O ports.
:Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
:Architectures: x86, arm, arm64, mips
:Type: vm ioctl
:Parameters: struct kvm_dirty_log (in)
:Parameters: struct kvm_clear_dirty_log (in)
:Returns: 0 on success, -1 on error
::
......@@ -4459,10 +4461,9 @@ in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
(for example via write-protection, or by clearing the dirty bit in
a page table entry).
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
the address space for which you want to return the dirty bitmap.
They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies
the address space for which you want to clear the dirty status. See
KVM_SET_USER_MEMORY_REGION for details on the usage of slot field.
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
is enabled; for more information, see the description of the capability.
......
......@@ -1396,8 +1396,9 @@ static void cpu_init_hyp_mode(void)
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
* Also drop the KASAN tag which gets in the way...
*/
params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) -
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
params->mair_el2 = read_sysreg(mair_el1);
......
......@@ -77,12 +77,6 @@ static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3));
}
static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt)
{
psci_forward(host_ctxt);
hyp_panic(); /* unreachable */
}
static unsigned int find_cpu_id(u64 mpidr)
{
unsigned int i;
......@@ -251,10 +245,13 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_
case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
return psci_forward(host_ctxt);
/*
* SYSTEM_OFF/RESET should not return according to the spec.
* Allow it so as to stay robust to broken firmware.
*/
case PSCI_0_2_FN_SYSTEM_OFF:
case PSCI_0_2_FN_SYSTEM_RESET:
psci_forward_noreturn(host_ctxt);
unreachable();
return psci_forward(host_ctxt);
case PSCI_0_2_FN64_CPU_SUSPEND:
return psci_cpu_suspend(func_id, host_ctxt);
case PSCI_0_2_FN64_CPU_ON:
......
......@@ -788,7 +788,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
{
unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
u64 val, mask = 0;
int base, i;
int base, i, nr_events;
if (!pmceid1) {
val = read_sysreg(pmceid0_el0);
......@@ -801,14 +801,18 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
if (!bmap)
return val;
nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
for (i = 0; i < 32; i += 8) {
u64 byte;
byte = bitmap_get_value8(bmap, base + i);
mask |= byte << i;
if (nr_events >= (0x4000 + base + 32)) {
byte = bitmap_get_value8(bmap, 0x4000 + base + i);
mask |= byte << (32 + i);
}
}
return val & mask;
}
......
......@@ -43,6 +43,10 @@
* 64bit interface.
*/
#define reg_to_encoding(x) \
sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \
(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
static bool read_from_write_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
......@@ -273,8 +277,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
u32 sr = sys_reg((u32)r->Op0, (u32)r->Op1,
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
u32 sr = reg_to_encoding(r);
if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) {
kvm_inject_undefined(vcpu);
......@@ -590,6 +593,15 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
}
static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (kvm_vcpu_has_pmu(vcpu))
return 0;
return REG_HIDDEN;
}
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 pmcr, val;
......@@ -613,9 +625,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
{
u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
bool enabled = kvm_vcpu_has_pmu(vcpu);
bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
enabled &= (reg & flags) || vcpu_mode_priv(vcpu);
if (!enabled)
kvm_inject_undefined(vcpu);
......@@ -900,11 +911,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_vcpu_has_pmu(vcpu)) {
kvm_inject_undefined(vcpu);
return false;
}
if (p->is_write) {
if (!vcpu_mode_priv(vcpu)) {
kvm_inject_undefined(vcpu);
......@@ -921,10 +927,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
#define reg_to_encoding(x) \
sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \
(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
{ SYS_DESC(SYS_DBGBVRn_EL1(n)), \
......@@ -936,15 +938,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{ SYS_DESC(SYS_DBGWCRn_EL1(n)), \
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
#define PMU_SYS_REG(r) \
SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
{ SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \
access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
......@@ -1020,8 +1025,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r, bool raz)
{
u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
u32 id = reg_to_encoding(r);
u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
if (id == SYS_ID_AA64PFR0_EL1) {
......@@ -1062,8 +1066,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
u32 id = reg_to_encoding(r);
switch (id) {
case SYS_ID_AA64ZFR0_EL1:
......@@ -1486,8 +1489,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
{ SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ PMU_SYS_REG(SYS_PMINTENSET_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ PMU_SYS_REG(SYS_PMINTENCLR_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
......@@ -1526,23 +1531,36 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
{ SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
{ SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
{ SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
{ SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
{ SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
{ SYS_DESC(SYS_PMCEID1_EL0), access_pmceid },
{ SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
{ SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper },
{ SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr },
{ PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
.reset = reset_pmcr, .reg = PMCR_EL0 },
{ PMU_SYS_REG(SYS_PMCNTENSET_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMCNTENCLR_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
.access = access_pmswinc, .reg = PMSWINC_EL0 },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
.access = access_pmselr, .reg = PMSELR_EL0 },
{ PMU_SYS_REG(SYS_PMCEID0_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
.access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
.access = access_pmu_evcntr, .reset = NULL },
/*
* PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
{ SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
{ PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr,
.reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 },
{ PMU_SYS_REG(SYS_PMOVSSET_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
......@@ -1694,7 +1712,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
{ PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
{ SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
......
......@@ -9,31 +9,6 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
{ \
......@@ -43,7 +18,6 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
unsigned long val) \
{ \
vcpu->arch.regs[VCPU_REGS_##uname] = val; \
kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \
}
BUILD_KVM_GPR_ACCESSORS(rax, RAX)
BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
......@@ -63,6 +37,31 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
{
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
......
......@@ -44,8 +44,15 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
static inline u64 rsvd_bits(int s, int e)
static __always_inline u64 rsvd_bits(int s, int e)
{
BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s);
if (__builtin_constant_p(e))
BUILD_BUG_ON(e > 63);
else
e &= 63;
if (e < s)
return 0;
......
......@@ -200,6 +200,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (WARN_ON(!is_guest_mode(vcpu)))
return true;
if (!nested_svm_vmrun_msrpm(svm)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
......
......@@ -1415,15 +1415,12 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
* to be returned:
* GPRs RAX, RBX, RCX, RDX
*
* Copy their values to the GHCB if they are dirty.
* Copy their values, even if they may not have been written during the
* VM-Exit. It's the guest's responsibility to not consume random data.
*/
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
}
......
......@@ -3739,6 +3739,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
trace_kvm_entry(vcpu);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
......
......@@ -3124,13 +3124,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
return 0;
}
static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_host_map *map;
struct page *page;
u64 hpa;
/*
* hv_evmcs may end up being not mapped after migration (when
......@@ -3153,6 +3149,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
}
return true;
}
static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_host_map *map;
struct page *page;
u64 hpa;
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
......@@ -3221,6 +3228,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
return true;
}
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
if (!nested_get_evmcs_page(vcpu))
return false;
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
return false;
return true;
}
......@@ -6077,12 +6096,15 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu)) {
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
} else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
} else {
copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
if (!vmx->nested.need_vmcs12_to_shadow_sync) {
if (vmx->nested.hv_evmcs)
copy_enlightened_to_vmcs12(vmx);
else if (enable_shadow_vmcs)
copy_shadow_to_vmcs12(vmx);
}
}
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
......@@ -6602,7 +6624,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
.get_nested_state_pages = nested_get_vmcs12_pages,
.get_nested_state_pages = vmx_get_nested_state_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
......
......@@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
[7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
/* mapping between fixed pmc index and intel_arch_events array */
......@@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
x86_pmu.num_counters_gp);
eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp);
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len);
pmu->available_event_types = ~entry->ebx &
((1ull << eax.split.mask_length) - 1);
......@@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_fixed_counters =
min_t(int, edx.split.num_counters_fixed,
x86_pmu.num_counters_fixed);
edx.split.bit_width_fixed = min_t(int,
edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
((u64)1 << edx.split.bit_width_fixed) - 1;
}
......
......@@ -6653,6 +6653,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return EXIT_FASTPATH_NONE;
trace_kvm_entry(vcpu);
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
vmcs_write32(PLE_WINDOW, vmx->ple_window);
......
......@@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
......@@ -4230,6 +4231,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
{
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
/*
* In guest mode, payload delivery should be deferred,
* so that the L1 hypervisor can intercept #PF before
......@@ -8802,9 +8806,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
;
else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
r = 0;
goto out;
}
......@@ -8988,8 +8990,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops.request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu);
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_return();
......@@ -11556,6 +11556,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
......
......@@ -1292,6 +1292,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
return -EINVAL;
/* We can read the guest memory with __xxx_user() later on. */
if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
(mem->userspace_addr != untagged_addr(mem->userspace_addr)) ||
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size))
return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment