Commit 443381a8 authored by Anthony Liguori's avatar Anthony Liguori Committed by Avi Kivity

KVM: VMX: add module parameter to avoid trapping HLT instructions (v5)

In certain use-cases, we want to allocate guests fixed time slices where idle
guest cycles leave the machine idling.  There are many approaches to achieve
this but the most direct is to simply avoid trapping the HLT instruction which
lets the guest directly execute the instruction putting the processor to sleep.

Introduce this as a module-level option for kvm-vmx.ko since if you do this
for one guest, you probably want to do it for all.
Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 38e5e92f
...@@ -297,6 +297,12 @@ enum vmcs_field { ...@@ -297,6 +297,12 @@ enum vmcs_field {
#define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_SMI 0x00000004
#define GUEST_INTR_STATE_NMI 0x00000008 #define GUEST_INTR_STATE_NMI 0x00000008
/* GUEST_ACTIVITY_STATE flags */
#define GUEST_ACTIVITY_ACTIVE 0
#define GUEST_ACTIVITY_HLT 1
#define GUEST_ACTIVITY_SHUTDOWN 2
#define GUEST_ACTIVITY_WAIT_SIPI 3
/* /*
* Exit Qualifications for MOV for Control Register Access * Exit Qualifications for MOV for Control Register Access
*/ */
......
...@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); ...@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static int __read_mostly vmm_exclusive = 1; static int __read_mostly vmm_exclusive = 1;
module_param(vmm_exclusive, bool, S_IRUGO); module_param(vmm_exclusive, bool, S_IRUGO);
static int __read_mostly yield_on_hlt = 1;
module_param(yield_on_hlt, bool, S_IRUGO);
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
#define KVM_GUEST_CR0_MASK \ #define KVM_GUEST_CR0_MASK \
...@@ -1009,6 +1012,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) ...@@ -1009,6 +1012,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vmx_set_interrupt_shadow(vcpu, 0); vmx_set_interrupt_shadow(vcpu, 0);
} }
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/* Ensure that we clear the HLT state in the VMCS. We don't need to
* explicitly skip the instruction because if the HLT state is set, then
* the instruction is already executing and RIP has already been
* advanced. */
if (!yield_on_hlt &&
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code, bool has_error_code, u32 error_code,
bool reinject) bool reinject)
...@@ -1035,6 +1049,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, ...@@ -1035,6 +1049,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
intr_info |= INTR_TYPE_HARD_EXCEPTION; intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
vmx_clear_hlt(vcpu);
} }
static bool vmx_rdtscp_supported(void) static bool vmx_rdtscp_supported(void)
...@@ -1419,7 +1434,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ...@@ -1419,7 +1434,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_pin_based_exec_control) < 0) &_pin_based_exec_control) < 0)
return -EIO; return -EIO;
min = CPU_BASED_HLT_EXITING | min =
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING | CPU_BASED_CR8_STORE_EXITING |
...@@ -1432,6 +1447,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ...@@ -1432,6 +1447,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_MWAIT_EXITING | CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING | CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING; CPU_BASED_INVLPG_EXITING;
if (yield_on_hlt)
min |= CPU_BASED_HLT_EXITING;
opt = CPU_BASED_TPR_SHADOW | opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
...@@ -2728,7 +2747,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) ...@@ -2728,7 +2747,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_ACTIVITY_STATE, 0); vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
...@@ -2821,6 +2840,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ...@@ -2821,6 +2840,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else } else
intr |= INTR_TYPE_EXT_INTR; intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
vmx_clear_hlt(vcpu);
} }
static void vmx_inject_nmi(struct kvm_vcpu *vcpu) static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
...@@ -2848,6 +2868,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ...@@ -2848,6 +2868,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
} }
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
vmx_clear_hlt(vcpu);
} }
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment