Commit 4995a368 authored by Cathy Avery's avatar Cathy Avery Committed by Paolo Bonzini

KVM: SVM: Use a separate vmcb for the nested L2 guest

svm->vmcb will now point to a separate vmcb for L1 (not nested) or L2
(nested).

The main advantages are removing get_host_vmcb and hsave, in favor of
concepts that are shared with VMX.

We don't need anymore to stash the L1 registers in hsave while L2
runs, but we need to copy the VMLOAD/VMSAVE registers from VMCB01 to
VMCB02 and back.  This more or less has the same cost, but code-wise
nested_svm_vmloadsave can be reused.

This patch omits several optimizations that are possible:

- for simplicity there is some wholesale copying of vmcb.control areas
which can go away.

- we should be able to better use the VMCB01 and VMCB02 clean bits.

- another possibility is to always use VMCB01 for VMLOAD and VMSAVE,
thus avoiding the copy of VMLOAD/VMSAVE registers from VMCB01 to
VMCB02 and back.

Tested:
kvm-unit-tests
kvm self tests
Loaded fedora nested guest on fedora
Signed-off-by: default avatarCathy Avery <cavery@redhat.com>
Message-Id: <20201011184818.3609-3-cavery@redhat.com>
[Fix conflicts; keep VMCB02 G_PAT up to date whenever guest writes the
 PAT MSR; do not copy CR4 over from VMCB01 as it is not needed anymore; add
 a few more comments. - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent a3322d5c
This diff is collapsed.
...@@ -1084,8 +1084,8 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -1084,8 +1084,8 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
if (is_guest_mode(vcpu)) { if (is_guest_mode(vcpu)) {
/* Write L1's TSC offset. */ /* Write L1's TSC offset. */
g_tsc_offset = svm->vmcb->control.tsc_offset - g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset; svm->vmcb01.ptr->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset; svm->vmcb01.ptr->control.tsc_offset = offset;
} }
trace_kvm_write_tsc_offset(vcpu->vcpu_id, trace_kvm_write_tsc_offset(vcpu->vcpu_id,
...@@ -1305,10 +1305,31 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ...@@ -1305,10 +1305,31 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
} }
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
{
svm->current_vmcb = target_vmcb;
svm->vmcb = target_vmcb->ptr;
svm->vmcb_pa = target_vmcb->pa;
/*
* Workaround: we don't yet track the ASID generation
* that was active the last time target_vmcb was run.
*/
svm->asid_generation = 0;
/*
* Workaround: we don't yet track the physical CPU that
* target_vmcb has run on.
*/
vmcb_mark_all_dirty(svm->vmcb);
}
static int svm_create_vcpu(struct kvm_vcpu *vcpu) static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm; struct vcpu_svm *svm;
struct page *vmcb_page; struct page *vmcb01_page;
struct page *vmsa_page = NULL; struct page *vmsa_page = NULL;
int err; int err;
...@@ -1316,8 +1337,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -1316,8 +1337,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu); svm = to_svm(vcpu);
err = -ENOMEM; err = -ENOMEM;
vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!vmcb_page) if (!vmcb01_page)
goto out; goto out;
if (sev_es_guest(svm->vcpu.kvm)) { if (sev_es_guest(svm->vcpu.kvm)) {
...@@ -1356,14 +1377,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -1356,14 +1377,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm_vcpu_init_msrpm(vcpu, svm->msrpm);
svm->vmcb = page_address(vmcb_page); svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
if (vmsa_page) if (vmsa_page)
svm->vmsa = page_address(vmsa_page); svm->vmsa = page_address(vmsa_page);
svm->asid_generation = 0; svm->asid_generation = 0;
svm->guest_state_loaded = false; svm->guest_state_loaded = false;
svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(svm); init_vmcb(svm);
svm_init_osvw(vcpu); svm_init_osvw(vcpu);
...@@ -1379,7 +1402,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -1379,7 +1402,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (vmsa_page) if (vmsa_page)
__free_page(vmsa_page); __free_page(vmsa_page);
error_free_vmcb_page: error_free_vmcb_page:
__free_page(vmcb_page); __free_page(vmcb01_page);
out: out:
return err; return err;
} }
...@@ -1407,7 +1430,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) ...@@ -1407,7 +1430,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
sev_free_vcpu(vcpu); sev_free_vcpu(vcpu);
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_page(pfn_to_page(__sme_clr(svm->vmcb01.pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
} }
...@@ -1564,7 +1587,7 @@ static void svm_clear_vintr(struct vcpu_svm *svm) ...@@ -1564,7 +1587,7 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
/* Drop int_ctl fields related to VINTR injection. */ /* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= mask; svm->vmcb->control.int_ctl &= mask;
if (is_guest_mode(&svm->vcpu)) { if (is_guest_mode(&svm->vcpu)) {
svm->nested.hsave->control.int_ctl &= mask; svm->vmcb01.ptr->control.int_ctl &= mask;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK)); (svm->nested.ctl.int_ctl & V_TPR_MASK));
...@@ -2861,7 +2884,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -2861,7 +2884,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1; return 1;
vcpu->arch.pat = data; vcpu->arch.pat = data;
svm->vmcb->save.g_pat = data; svm->vmcb01.ptr->save.g_pat = data;
if (is_guest_mode(vcpu))
nested_vmcb02_compute_g_pat(svm);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT); vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
break; break;
case MSR_IA32_SPEC_CTRL: case MSR_IA32_SPEC_CTRL:
...@@ -3536,7 +3561,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) ...@@ -3536,7 +3561,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
} else if (is_guest_mode(vcpu)) { } else if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */ /* As long as interrupts are being delivered... */
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
: !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
return true; return true;
......
...@@ -81,8 +81,13 @@ struct kvm_svm { ...@@ -81,8 +81,13 @@ struct kvm_svm {
struct kvm_vcpu; struct kvm_vcpu;
struct kvm_vmcb_info {
struct vmcb *ptr;
unsigned long pa;
};
struct svm_nested_state { struct svm_nested_state {
struct vmcb *hsave; struct kvm_vmcb_info vmcb02;
u64 hsave_msr; u64 hsave_msr;
u64 vm_cr_msr; u64 vm_cr_msr;
u64 vmcb12_gpa; u64 vmcb12_gpa;
...@@ -104,6 +109,8 @@ struct vcpu_svm { ...@@ -104,6 +109,8 @@ struct vcpu_svm {
struct kvm_vcpu vcpu; struct kvm_vcpu vcpu;
struct vmcb *vmcb; struct vmcb *vmcb;
unsigned long vmcb_pa; unsigned long vmcb_pa;
struct kvm_vmcb_info vmcb01;
struct kvm_vmcb_info *current_vmcb;
struct svm_cpu_data *svm_data; struct svm_cpu_data *svm_data;
u32 asid; u32 asid;
uint64_t asid_generation; uint64_t asid_generation;
...@@ -244,14 +251,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) ...@@ -244,14 +251,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu); return container_of(vcpu, struct vcpu_svm, vcpu);
} }
static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
{
if (is_guest_mode(&svm->vcpu))
return svm->nested.hsave;
else
return svm->vmcb;
}
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit) static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
{ {
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT); WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
...@@ -272,7 +271,7 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit) ...@@ -272,7 +271,7 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
static inline void set_dr_intercepts(struct vcpu_svm *svm) static inline void set_dr_intercepts(struct vcpu_svm *svm)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
if (!sev_es_guest(svm->vcpu.kvm)) { if (!sev_es_guest(svm->vcpu.kvm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
...@@ -299,7 +298,7 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm) ...@@ -299,7 +298,7 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
static inline void clr_dr_intercepts(struct vcpu_svm *svm) static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.intercepts[INTERCEPT_DR] = 0; vmcb->control.intercepts[INTERCEPT_DR] = 0;
...@@ -314,7 +313,7 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm) ...@@ -314,7 +313,7 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32); WARN_ON_ONCE(bit >= 32);
vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit); vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
...@@ -324,7 +323,7 @@ static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) ...@@ -324,7 +323,7 @@ static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit) static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
WARN_ON_ONCE(bit >= 32); WARN_ON_ONCE(bit >= 32);
vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit); vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
...@@ -334,7 +333,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit) ...@@ -334,7 +333,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit) static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_set_intercept(&vmcb->control, bit); vmcb_set_intercept(&vmcb->control, bit);
...@@ -343,7 +342,7 @@ static inline void svm_set_intercept(struct vcpu_svm *svm, int bit) ...@@ -343,7 +342,7 @@ static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit) static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{ {
struct vmcb *vmcb = get_host_vmcb(svm); struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb_clr_intercept(&vmcb->control, bit); vmcb_clr_intercept(&vmcb->control, bit);
...@@ -451,6 +450,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, ...@@ -451,6 +450,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code); bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm); int nested_svm_exit_special(struct vcpu_svm *svm);
void sync_nested_vmcb_control(struct vcpu_svm *svm); void sync_nested_vmcb_control(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
extern struct kvm_x86_nested_ops svm_nested_ops; extern struct kvm_x86_nested_ops svm_nested_ops;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment