Commit 3e662e97 authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Juerg Haefliger

KVM: VMX: make MSR bitmaps per-VCPU

BugLink: https://bugs.launchpad.net/bugs/1811080

commit 904e14fb upstream.

Place the MSR bitmap in struct loaded_vmcs, and update it in place
every time the x2apic or APICv state can change.  This is rare and
the loop can handle 64 MSRs per iteration, in a similar fashion as
nested_vmx_prepare_msr_bitmap.

This prepares for choosing, on a per-VM basis, whether to intercept
the SPEC_CTRL and PRED_CMD MSRs.
Suggested-by: default avatarJim Mattson <jmattson@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
[bwh: Backported to 4.4:
 - APICv support looked different
 - We still need to intercept the APIC_ID MSR
 - Adjust context]
Signed-off-by: default avatarBen Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
[juergh: Backported to Xenial 4.4:
 - Adjusted for APIC mode check differences.
 - Replaced an additional occurence of vmx_set_msr_bitmap.]
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
parent 861d73a0
......@@ -111,6 +111,14 @@ static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
#define MSR_BITMAP_MODE_LM 4
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
......@@ -334,6 +342,7 @@ struct loaded_vmcs {
struct vmcs *vmcs;
int cpu;
int launched;
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
};
......@@ -569,8 +578,6 @@ struct nested_vmx {
u16 posted_intr_nv;
u64 msr_ia32_feature_control;
unsigned long *msr_bitmap;
struct hrtimer preemption_timer;
bool preemption_timer_expired;
......@@ -676,6 +683,7 @@ struct vcpu_vmx {
unsigned long host_rsp;
u8 fail;
bool nmi_known_unmasked;
u8 msr_bitmap_mode;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
......@@ -1033,6 +1041,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
static int alloc_identity_pagetable(struct kvm *kvm);
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
......@@ -1052,10 +1061,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
......@@ -2518,29 +2523,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx->guest_msrs[from] = tmp;
}
static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;
if (is_guest_mode(vcpu))
msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
} else {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode;
else
msr_bitmap = vmx_msr_bitmap_legacy;
}
vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
}
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
......@@ -2581,7 +2563,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(&vmx->vcpu);
vmx_update_msr_bitmap(&vmx->vcpu);
}
/*
......@@ -3560,6 +3542,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
if (loaded_vmcs->msr_bitmap)
free_page((unsigned long)loaded_vmcs->msr_bitmap);
}
static struct vmcs *alloc_vmcs(void)
......@@ -3574,7 +3558,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
return -ENOMEM;
loaded_vmcs_init(loaded_vmcs);
if (cpu_has_vmx_msr_bitmap()) {
loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!loaded_vmcs->msr_bitmap)
goto out_vmcs;
memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
}
return 0;
out_vmcs:
free_loaded_vmcs(loaded_vmcs);
return -ENOMEM;
}
static void free_kvm_area(void)
......@@ -4573,9 +4568,7 @@ static void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
......@@ -4610,7 +4603,7 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
......@@ -4691,37 +4684,78 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type, bool value)
{
if (!longmode_only)
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
msr, MSR_TYPE_R | MSR_TYPE_W);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
msr, MSR_TYPE_R | MSR_TYPE_W);
if (value)
vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
else
vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
}
static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
{
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_R);
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_R);
u8 mode = 0;
if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
mode |= MSR_BITMAP_MODE_X2APIC;
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
}
if (is_long_mode(vcpu))
mode |= MSR_BITMAP_MODE_LM;
return mode;
}
static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
u8 mode)
{
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_R);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_R);
int msr;
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
unsigned word = msr / BITS_PER_LONG;
msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
}
if (mode & MSR_BITMAP_MODE_X2APIC) {
/*
* TPR reads and writes can be virtualized even if virtual interrupt
* delivery is not in use.
*/
vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_ID), MSR_TYPE_R);
vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
}
}
}
static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
{
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_W);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_W);
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
u8 mode = vmx_msr_bitmap_mode(vcpu);
u8 changed = mode ^ vmx->msr_bitmap_mode;
if (!changed)
return;
vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
!(mode & MSR_BITMAP_MODE_LM));
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
vmx->msr_bitmap_mode = mode;
}
static bool vmx_get_enable_apicv(void)
......@@ -4965,7 +4999,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
}
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(vcpu);
vmx_update_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
......@@ -5057,7 +5091,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
......@@ -6401,7 +6435,7 @@ static void wakeup_handler(void)
static __init int hardware_setup(void)
{
int r = -ENOMEM, i, msr;
int r = -ENOMEM, i;
rdmsrl_safe(MSR_EFER, &host_efer);
......@@ -6416,31 +6450,13 @@ static __init int hardware_setup(void)
if (!vmx_io_bitmap_b)
goto out;
vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy)
goto out1;
vmx_msr_bitmap_legacy_x2apic =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy_x2apic)
goto out2;
vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode)
goto out3;
vmx_msr_bitmap_longmode_x2apic =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
goto out1;
vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmwrite_bitmap)
goto out7;
goto out2;
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
......@@ -6449,12 +6465,9 @@ static __init int hardware_setup(void)
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
goto out8;
goto out3;
}
if (boot_cpu_has(X86_FEATURE_NX))
......@@ -6511,37 +6524,8 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
for (msr = 0x800; msr <= 0x8ff; msr++)
vmx_disable_intercept_msr_read_x2apic(msr);
/* According SDM, in x2apic mode, the whole id reg is used. But in
* KVM, it only use the highest eight bits. Need to intercept it */
vmx_enable_intercept_msr_read_x2apic(0x802);
/* TMCCT */
vmx_enable_intercept_msr_read_x2apic(0x839);
/* TPR */
vmx_disable_intercept_msr_write_x2apic(0x808);
/* EOI */
vmx_disable_intercept_msr_write_x2apic(0x80b);
/* SELF-IPI */
vmx_disable_intercept_msr_write_x2apic(0x83f);
if (enable_ept) {
kvm_mmu_set_mask_ptes(0ull,
(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
......@@ -6572,18 +6556,10 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
out8:
free_page((unsigned long)vmx_vmwrite_bitmap);
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
out3:
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
free_page((unsigned long)vmx_vmwrite_bitmap);
out2:
free_page((unsigned long)vmx_msr_bitmap_legacy);
free_page((unsigned long)vmx_vmread_bitmap);
out1:
free_page((unsigned long)vmx_io_bitmap_b);
out:
......@@ -6594,10 +6570,6 @@ static __init int hardware_setup(void)
static __exit void hardware_unsetup(void)
{
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
free_page((unsigned long)vmx_msr_bitmap_legacy);
free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
......@@ -6961,13 +6933,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
if (r < 0)
goto out_vmcs02;
if (cpu_has_vmx_msr_bitmap()) {
vmx->nested.msr_bitmap =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx->nested.msr_bitmap)
goto out_msr_bitmap;
}
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
if (!shadow_vmcs)
......@@ -6992,9 +6957,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
out_shadow_vmcs:
free_page((unsigned long)vmx->nested.msr_bitmap);
out_msr_bitmap:
free_loaded_vmcs(&vmx->nested.vmcs02);
out_vmcs02:
......@@ -7068,10 +7030,6 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
if (vmx->nested.msr_bitmap) {
free_page((unsigned long)vmx->nested.msr_bitmap);
vmx->nested.msr_bitmap = NULL;
}
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
/* Unpin physical memory we referred to in the vmcs02 */
......@@ -8481,7 +8439,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
vmx_set_msr_bitmap(vcpu);
vmx_update_msr_bitmap(vcpu);
}
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
......@@ -9068,6 +9026,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
unsigned long *msr_bitmap;
int cpu;
if (!vmx)
......@@ -9108,6 +9067,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err < 0)
goto free_msrs;
msr_bitmap = vmx->vmcs01.msr_bitmap;
vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
vmx->msr_bitmap_mode = 0;
vmx->loaded_vmcs = &vmx->vmcs01;
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
......@@ -9524,7 +9492,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
int msr;
struct page *page;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
/* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
......@@ -10035,6 +10003,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
......@@ -10738,7 +10709,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(vcpu);
vmx_update_msr_bitmap(vcpu);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment