Commit 57880666 authored by Paolo Bonzini's avatar Paolo Bonzini Committed by Stefan Bader

x86/KVM/VMX: Add L1D flush logic

Add the logic for flushing L1D on VMENTER. The flush depends on the static
key being enabled and the new l1tf_flush_l1d flag being set.

The flags is set:
 - Always, if the flush module parameter is 'always'

 - Conditionally at:
   - Entry to vcpu_run(), i.e. after executing user space

   - From the sched_in notifier, i.e. when switching to a vCPU thread.

   - From vmexit handlers which are considered unsafe, i.e. where
     sensitive data can be brought into L1D:

     - The emulator, which could be a good target for other speculative
       execution-based threats,

     - The MMU, which can bring host page tables in the L1 cache.

     - External interrupts

     - Nested operations that require the MMU (see above). That is
       vmptrld, vmptrst, vmclear,vmwrite,vmread.

     - When handling invept,invvpid

[ tglx: Split out from combo patch and reduced to a single flag ]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>

CVE-2018-3620
CVE-2018-3646

[smb: Moved change to kvm/mmu.c(kvm_handle_page_fault) into kvm/vmx.c
      before calling kvm_mmu_page_fault(). Left kvm/svm.c unmodified
      as AMD is not said to be affected.]
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
parent 37329e72
...@@ -614,6 +614,9 @@ struct kvm_vcpu_arch { ...@@ -614,6 +614,9 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi; int pending_ioapic_eoi;
int pending_external_vector; int pending_external_vector;
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
}; };
struct kvm_lpage_info { struct kvm_lpage_info {
...@@ -754,6 +757,7 @@ struct kvm_vcpu_stat { ...@@ -754,6 +757,7 @@ struct kvm_vcpu_stat {
u32 signal_exits; u32 signal_exits;
u32 irq_window_exits; u32 irq_window_exits;
u32 nmi_window_exits; u32 nmi_window_exits;
u32 l1d_flush;
u32 halt_exits; u32 halt_exits;
u32 halt_successful_poll; u32 halt_successful_poll;
u32 halt_attempted_poll; u32 halt_attempted_poll;
......
...@@ -5401,6 +5401,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) ...@@ -5401,6 +5401,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
/* EPT won't cause page fault directly */ /* EPT won't cause page fault directly */
BUG_ON(enable_ept); BUG_ON(enable_ept);
cr2 = vmcs_readl(EXIT_QUALIFICATION); cr2 = vmcs_readl(EXIT_QUALIFICATION);
vcpu->arch.l1tf_flush_l1d = true;
trace_kvm_page_fault(cr2, error_code); trace_kvm_page_fault(cr2, error_code);
if (kvm_event_needs_reinjection(vcpu)) if (kvm_event_needs_reinjection(vcpu))
...@@ -8279,9 +8280,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) ...@@ -8279,9 +8280,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
#define L1D_CACHE_ORDER 4 #define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages; static void *vmx_l1d_flush_pages;
static void __maybe_unused vmx_l1d_flush(void) static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{ {
int size = PAGE_SIZE << L1D_CACHE_ORDER; int size = PAGE_SIZE << L1D_CACHE_ORDER;
bool always;
/*
* If the mitigation mode is 'flush always', keep the flush bit
* set, otherwise clear it. It gets set again either from
* vcpu_run() or from one of the unsafe VMEXIT handlers.
*/
always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
vcpu->arch.l1tf_flush_l1d = always;
vcpu->stat.l1d_flush++;
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
...@@ -8523,6 +8535,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) ...@@ -8523,6 +8535,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
[ss]"i"(__KERNEL_DS), [ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS) [cs]"i"(__KERNEL_CS)
); );
vcpu->arch.l1tf_flush_l1d = true;
} else } else
local_irq_enable(); local_irq_enable();
} }
...@@ -8736,6 +8749,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -8736,6 +8749,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
x86_spec_ctrl_set_guest(vcpu->arch.spec_ctrl, 0); x86_spec_ctrl_set_guest(vcpu->arch.spec_ctrl, 0);
if (static_branch_unlikely(&vmx_l1d_should_flush)) {
if (vcpu->arch.l1tf_flush_l1d)
vmx_l1d_flush(vcpu);
}
vmx->__launched = vmx->loaded_vmcs->launched; vmx->__launched = vmx->loaded_vmcs->launched;
asm( asm(
/* Store host registers */ /* Store host registers */
...@@ -10188,6 +10206,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) ...@@ -10188,6 +10206,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmcs12->launch_state = 1; vmcs12->launch_state = 1;
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
return kvm_vcpu_halt(vcpu); return kvm_vcpu_halt(vcpu);
......
...@@ -169,6 +169,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { ...@@ -169,6 +169,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
{ "irq_injections", VCPU_STAT(irq_injections) }, { "irq_injections", VCPU_STAT(irq_injections) },
{ "nmi_injections", VCPU_STAT(nmi_injections) }, { "nmi_injections", VCPU_STAT(nmi_injections) },
{ "l1d_flush", VCPU_STAT(l1d_flush) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
...@@ -4351,6 +4352,9 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v ...@@ -4351,6 +4352,9 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v
int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
unsigned int bytes, struct x86_exception *exception) unsigned int bytes, struct x86_exception *exception)
{ {
/* kvm_write_guest_virt_system can pull in tons of pages. */
vcpu->arch.l1tf_flush_l1d = true;
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception); PFERR_WRITE_MASK, exception);
} }
...@@ -5449,6 +5453,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, ...@@ -5449,6 +5453,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
bool writeback = true; bool writeback = true;
bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
vcpu->arch.l1tf_flush_l1d = true;
/* /*
* Clear write_fault_to_shadow_pgtable here to ensure it is * Clear write_fault_to_shadow_pgtable here to ensure it is
* never reused. * never reused.
...@@ -6785,6 +6791,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6785,6 +6791,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vcpu->arch.l1tf_flush_l1d = true;
for (;;) { for (;;) {
if (kvm_vcpu_running(vcpu)) { if (kvm_vcpu_running(vcpu)) {
...@@ -7755,6 +7762,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) ...@@ -7755,6 +7762,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{ {
vcpu->arch.l1tf_flush_l1d = true;
kvm_x86_ops->sched_in(vcpu, cpu); kvm_x86_ops->sched_in(vcpu, cpu);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment