Commit 323d73a8 authored by Liran Alon's avatar Liran Alon Committed by Paolo Bonzini

KVM: nVMX: Change KVM_STATE_NESTED_EVMCS to signal vmcs12 is copied from eVMCS

Currently KVM_STATE_NESTED_EVMCS is used to signal that eVMCS
capability is enabled on vCPU.
As indicated by vmx->nested.enlightened_vmcs_enabled.

This is quite bizarre as userspace VMM should make sure to expose
same vCPU with same CPUID values in both source and destination.
In case vCPU is exposed with eVMCS support on CPUID, it is also
expected to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability.
Therefore, KVM_STATE_NESTED_EVMCS is redundant.

KVM_STATE_NESTED_EVMCS is currently used on restore path
(vmx_set_nested_state()) only to enable eVMCS capability in KVM
and to signal need_vmcs12_sync such that on next VMEntry to guest
nested_sync_from_vmcs12() will be called to sync vmcs12 content
into eVMCS in guest memory.
However, because restore nested-state is rare enough, we could
have just modified vmx_set_nested_state() to always signal
need_vmcs12_sync.

From all the above, it seems that we could have just removed
the usage of KVM_STATE_NESTED_EVMCS. However, in order to preserve
backwards migration compatibility, we cannot do that.
(vmx_get_nested_state() needs to signal flag when migrating from
new kernel to old kernel).

Returning KVM_STATE_NESTED_EVMCS when just vCPU have eVMCS enabled
have a bad side-effect of userspace VMM having to send nested-state
from source to destination as part of migration stream. Even if
guest have never used eVMCS as it doesn't even run a nested
hypervisor workload. This requires destination userspace VMM and
KVM to support setting nested-state. Which make it more difficult
to migrate from new host to older host.
To avoid this, change KVM_STATE_NESTED_EVMCS to signal eVMCS is
not only enabled but also active. i.e. Guest have made some
eVMCS active via an enlightened VMEntry. i.e. vmcs12 is copied
from eVMCS and therefore should be restored into eVMCS resident
in memory (by copy_vmcs12_to_enlightened()).
Reviewed-by: default avatarVitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: default avatarMaran Wilson <maran.wilson@oracle.com>
Reviewed-by: default avatarKrish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: default avatarLiran Alon <liran.alon@oracle.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 65b712f1
...@@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, ...@@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu); vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu); vmcs12 = get_vmcs12(vcpu);
if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
if (nested_vmx_allowed(vcpu) && if (nested_vmx_allowed(vcpu) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) { (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
...@@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, ...@@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx_has_valid_vmcs12(vcpu)) { if (vmx_has_valid_vmcs12(vcpu)) {
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
if (vmx->nested.hv_evmcs)
kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
if (is_guest_mode(vcpu) && if (is_guest_mode(vcpu) &&
nested_cpu_has_shadow_vmcs(vmcs12) && nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) vmcs12->vmcs_link_pointer != -1ull)
...@@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ...@@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
return -EINVAL; return -EINVAL;
/*
* KVM_STATE_NESTED_EVMCS used to signal that KVM should
* enable eVMCS capability on vCPU. However, since then
* code was changed such that flag signals vmcs12 should
* be copied into eVMCS in guest memory.
*
* To preserve backwards compatability, allow user
* to set this flag even when there is no VMXON region.
*/
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL; return -EINVAL;
} else { } else {
...@@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ...@@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
return -EINVAL; return -EINVAL;
} }
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
...@@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ...@@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
!(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL; return -EINVAL;
vmx_leave_nested(vcpu); if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
if (!nested_vmx_allowed(vcpu))
return -EINVAL; return -EINVAL;
nested_enable_evmcs(vcpu, NULL); vmx_leave_nested(vcpu);
}
if (kvm_state->hdr.vmx.vmxon_pa == -1ull) if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
return 0; return 0;
......
...@@ -146,6 +146,7 @@ int main(int argc, char *argv[]) ...@@ -146,6 +146,7 @@ int main(int argc, char *argv[])
kvm_vm_restart(vm, O_RDWR); kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0); vm_vcpu_add(vm, VCPU_ID, 0, 0);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
vcpu_load_state(vm, VCPU_ID, state); vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID); run = vcpu_state(vm, VCPU_ID);
free(state); free(state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment