Commit fc71278f authored by Jim Mattson's avatar Jim Mattson Committed by Juerg Haefliger

KVM: nVMX: Eliminate vmcs02 pool

BugLink: https://bugs.launchpad.net/bugs/1811080

commit de3a0021 upstream.

The potential performance advantages of a vmcs02 pool have never been
realized. To simplify the code, eliminate the pool. Instead, a single
vmcs02 is allocated per VCPU when the VCPU enters VMX operation.
Signed-off-by: default avatarJim Mattson <jmattson@google.com>
Signed-off-by: default avatarMark Kanda <mark.kanda@oracle.com>
Reviewed-by: default avatarAmeya More <ameya.more@oracle.com>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
[bwh: Backported to 4.4:
 - No loaded_vmcs::shadow_vmcs field to initialise
 - Adjust context]
Signed-off-by: default avatarBen Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
parent 94dc5e5e
...@@ -318,7 +318,6 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = { ...@@ -318,7 +318,6 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
#define NR_AUTOLOAD_MSRS 8 #define NR_AUTOLOAD_MSRS 8
#define VMCS02_POOL_SIZE 1
struct vmcs { struct vmcs {
u32 revision_id; u32 revision_id;
...@@ -351,7 +350,7 @@ struct shared_msr_entry { ...@@ -351,7 +350,7 @@ struct shared_msr_entry {
* stored in guest memory specified by VMPTRLD, but is opaque to the guest, * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests. * More than one of these structures may exist, if L1 runs multiple L2 guests.
* nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
* underlying hardware which will be used to run L2. * underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across * This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration). * machines (necessary for live migration).
...@@ -530,13 +529,6 @@ struct __packed vmcs12 { ...@@ -530,13 +529,6 @@ struct __packed vmcs12 {
*/ */
#define VMCS12_SIZE 0x1000 #define VMCS12_SIZE 0x1000
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
struct vmcs02_list {
struct list_head list;
gpa_t vmptr;
struct loaded_vmcs vmcs02;
};
/* /*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need * The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu. * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
...@@ -558,16 +550,16 @@ struct nested_vmx { ...@@ -558,16 +550,16 @@ struct nested_vmx {
*/ */
bool sync_shadow_vmcs; bool sync_shadow_vmcs;
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset; u64 vmcs01_tsc_offset;
bool change_vmcs01_virtual_x2apic_mode; bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */ /* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending; bool nested_run_pending;
struct loaded_vmcs vmcs02;
/* /*
* Guest pages referred to in vmcs02 with host-physical pointers, so * Guest pages referred to in the vmcs02 with host-physical
* we must keep them pinned while L2 runs. * pointers, so we must keep them pinned while L2 runs.
*/ */
struct page *apic_access_page; struct page *apic_access_page;
struct page *virtual_apic_page; struct page *virtual_apic_page;
...@@ -6642,93 +6634,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) ...@@ -6642,93 +6634,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
return handle_nop(vcpu); return handle_nop(vcpu);
} }
/*
* To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
* We could reuse a single VMCS for all the L2 guests, but we also want the
* option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
* allows keeping them loaded on the processor, and in the future will allow
* optimizations where prepare_vmcs02 doesn't need to set all the fields on
* every entry if they never change.
* So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
* (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
*
* The following functions allocate and free a vmcs02 in this pool.
*/
/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
{
struct vmcs02_list *item;
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmx->nested.current_vmptr) {
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
}
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
/* Recycle the least recently used VMCS. */
item = list_entry(vmx->nested.vmcs02_pool.prev,
struct vmcs02_list, list);
item->vmptr = vmx->nested.current_vmptr;
list_move(&item->list, &vmx->nested.vmcs02_pool);
return &item->vmcs02;
}
/* Create a new VMCS */
item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
if (!item->vmcs02.vmcs) {
kfree(item);
return NULL;
}
loaded_vmcs_init(&item->vmcs02);
item->vmptr = vmx->nested.current_vmptr;
list_add(&(item->list), &(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num++;
return &item->vmcs02;
}
/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
{
struct vmcs02_list *item;
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
if (item->vmptr == vmptr) {
free_loaded_vmcs(&item->vmcs02);
list_del(&item->list);
kfree(item);
vmx->nested.vmcs02_num--;
return;
}
}
/*
* Free all VMCSs saved for this vcpu, except the one pointed by
* vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
* must be &vmx->vmcs01.
*/
static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
{
struct vmcs02_list *item, *n;
WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
/*
* Something will leak if the above WARN triggers. Better than
* a use-after-free.
*/
if (vmx->loaded_vmcs == &item->vmcs02)
continue;
free_loaded_vmcs(&item->vmcs02);
list_del(&item->list);
kfree(item);
vmx->nested.vmcs02_num--;
}
}
/* /*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified * set the success or error code of an emulated VMX instruction, as specified
...@@ -7041,6 +6946,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -7041,6 +6946,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
vmx->nested.vmcs02.vmcs = alloc_vmcs();
if (!vmx->nested.vmcs02.vmcs)
goto out_vmcs02;
loaded_vmcs_init(&vmx->nested.vmcs02);
if (cpu_has_vmx_msr_bitmap()) { if (cpu_has_vmx_msr_bitmap()) {
vmx->nested.msr_bitmap = vmx->nested.msr_bitmap =
(unsigned long *)__get_free_page(GFP_KERNEL); (unsigned long *)__get_free_page(GFP_KERNEL);
...@@ -7059,9 +6969,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -7059,9 +6969,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.current_shadow_vmcs = shadow_vmcs; vmx->nested.current_shadow_vmcs = shadow_vmcs;
} }
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL); HRTIMER_MODE_REL);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
...@@ -7078,6 +6985,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) ...@@ -7078,6 +6985,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
free_page((unsigned long)vmx->nested.msr_bitmap); free_page((unsigned long)vmx->nested.msr_bitmap);
out_msr_bitmap: out_msr_bitmap:
free_loaded_vmcs(&vmx->nested.vmcs02);
out_vmcs02:
return -ENOMEM; return -ENOMEM;
} }
...@@ -7154,7 +7064,7 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7154,7 +7064,7 @@ static void free_nested(struct vcpu_vmx *vmx)
} }
if (enable_shadow_vmcs) if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs); free_vmcs(vmx->nested.current_shadow_vmcs);
/* Unpin physical memory we referred to in current vmcs02 */ /* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) { if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page); nested_release_page(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL; vmx->nested.apic_access_page = NULL;
...@@ -7170,7 +7080,7 @@ static void free_nested(struct vcpu_vmx *vmx) ...@@ -7170,7 +7080,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL; vmx->nested.pi_desc = NULL;
} }
nested_free_all_saved_vmcss(vmx); free_loaded_vmcs(&vmx->nested.vmcs02);
} }
/* Emulate the VMXOFF instruction */ /* Emulate the VMXOFF instruction */
...@@ -7204,8 +7114,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) ...@@ -7204,8 +7114,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12, launch_state), vmptr + offsetof(struct vmcs12, launch_state),
&zero, sizeof(zero)); &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu); nested_vmx_succeed(vcpu);
return 1; return 1;
...@@ -7992,10 +7900,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) ...@@ -7992,10 +7900,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
/* /*
* The host physical addresses of some pages of guest memory * The host physical addresses of some pages of guest memory
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
* may write to these pages via their host physical address while * Page). The CPU may write to these pages via their host
* L2 is running, bypassing any address-translation-based dirty * physical address while L2 is running, bypassing any
* tracking (e.g. EPT write protection). * address-translation-based dirty tracking (e.g. EPT write
* protection).
* *
* Mark them dirty on every exit from L2 to prevent them from * Mark them dirty on every exit from L2 to prevent them from
* getting out of sync with dirty tracking. * getting out of sync with dirty tracking.
...@@ -10209,7 +10118,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ...@@ -10209,7 +10118,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct vmcs12 *vmcs12; struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu; int cpu;
struct loaded_vmcs *vmcs02;
bool ia32e; bool ia32e;
u32 msr_entry_idx; u32 msr_entry_idx;
...@@ -10349,10 +10257,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ...@@ -10349,10 +10257,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the nested entry. * the nested entry.
*/ */
vmcs02 = nested_get_current_vmcs02(vmx);
if (!vmcs02)
return -ENOMEM;
enter_guest_mode(vcpu); enter_guest_mode(vcpu);
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
...@@ -10361,7 +10265,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ...@@ -10361,7 +10265,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
cpu = get_cpu(); cpu = get_cpu();
vmx->loaded_vmcs = vmcs02; vmx->loaded_vmcs = &vmx->nested.vmcs02;
vmx_vcpu_put(vcpu); vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu); vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu; vcpu->cpu = cpu;
...@@ -10876,10 +10780,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, ...@@ -10876,10 +10780,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
vmx_segment_cache_clear(vmx); vmx_segment_cache_clear(vmx);
/* if no vmcs02 cache requested, remove the one we used */
if (VMCS02_POOL_SIZE == 0)
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
load_vmcs12_host_state(vcpu, vmcs12); load_vmcs12_host_state(vcpu, vmcs12);
/* Update TSC_OFFSET if TSC was changed while L2 ran */ /* Update TSC_OFFSET if TSC was changed while L2 ran */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment