x86/KVM/VMX: Add L1D flush logic

Add the logic for flushing L1D on VMENTER. The flush depends on the static key being enabled and the new l1tf_flush_l1d flag being set. The flags is set: - Always, if the flush module parameter is 'always' - Conditionally at: - Entry to vcpu_run(), i.e. after executing user space - From the sched_in notifier, i.e. when switching to a vCPU thread. - From vmexit handlers which are considered unsafe, i.e. where sensitive data can be brought into L1D: - The emulator, which could be a good target for other speculative execution-based threats, - The MMU, which can bring host page tables in the L1 cache. - External interrupts - Nested operations that require the MMU (see above). That is vmptrld, vmptrst, vmclear,vmwrite,vmread. - When handling invept,invvpid [ tglx: Split out from combo patch and reduced to a single flag ] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> CVE-2018-3620 CVE-2018-3646 [smb: Moved change to kvm/mmu.c(kvm_handle_page_fault) into kvm/vmx.c before calling kvm_mmu_page_fault(). Left kvm/svm.c unmodified as AMD is not said to be affected.] Signed-off-by: Stefan Bader <stefan.bader@canonical.com>

x86/KVM/VMX: Add L1D flush logic
Add the logic for flushing L1D on VMENTER. The flush depends on the static key being enabled and the new l1tf_flush_l1d flag being set. The flags is set: - Always, if the flush module parameter is 'always' - Conditionally at: - Entry to vcpu_run(), i.e. after executing user space - From the sched_in notifier, i.e. when switching to a vCPU thread. - From vmexit handlers which are considered unsafe, i.e. where sensitive data can be brought into L1D: - The emulator, which could be a good target for other speculative execution-based threats, - The MMU, which can bring host page tables in the L1 cache. - External interrupts - Nested operations that require the MMU (see above). That is vmptrld, vmptrst, vmclear,vmwrite,vmread. - When handling invept,invvpid [ tglx: Split out from combo patch and reduced to a single flag ] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> CVE-2018-3620 CVE-2018-3646 [smb: Moved change to kvm/mmu.c(kvm_handle_page_fault) into kvm/vmx.c before calling kvm_mmu_page_fault(). Left kvm/svm.c unmodified as AMD is not said to be affected.] Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
57880666 · Paolo Bonzini · Stefan Bader · 37329e72 · 57880666 · 57880666
Commit 57880666 authored Jul 02, 2018 by Paolo Bonzini Committed by Stefan Bader Aug 08, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 1 deletion

arch/x86/include/asm/kvm_host.h arch/x86/include/asm/kvm_host.h +4 -0

arch/x86/kvm/vmx.c arch/x86/kvm/vmx.c +22 -1

arch/x86/kvm/x86.c arch/x86/kvm/x86.c +8 -0

No files found.
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -614,6 +614,9 @@ struct kvm_vcpu_arch {

 	int pending_ioapic_eoi;
 	int pending_external_vector;
+
+	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
+	bool l1tf_flush_l1d;
 };

 struct kvm_lpage_info {
@@ -754,6 +757,7 @@ struct kvm_vcpu_stat {
 	u32 signal_exits;
 	u32 irq_window_exits;
 	u32 nmi_window_exits;
+	u32 l1d_flush;
 	u32 halt_exits;
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5401,6 +5401,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		/* EPT won't cause page fault directly */
 		BUG_ON(enable_ept);
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+		vcpu->arch.l1tf_flush_l1d = true;
 		trace_kvm_page_fault(cr2, error_code);

 		if (kvm_event_needs_reinjection(vcpu))
@@ -8279,9 +8280,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;

-static void __maybe_unused vmx_l1d_flush(void)
+static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 {
 	int size = PAGE_SIZE << L1D_CACHE_ORDER;
+	bool always;
+
+	/*
+	 * If the mitigation mode is 'flush always', keep the flush bit
+	 * set, otherwise clear it. It gets set again either from
+	 * vcpu_run() or from one of the unsafe VMEXIT handlers.
+	 */
+	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+	vcpu->arch.l1tf_flush_l1d = always;
+
+	vcpu->stat.l1d_flush++;

 	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
@@ -8523,6 +8535,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
+		vcpu->arch.l1tf_flush_l1d = true;
 	} else
 		local_irq_enable();
 }
@@ -8736,6 +8749,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)

 	x86_spec_ctrl_set_guest(vcpu->arch.spec_ctrl, 0);

+	if (static_branch_unlikely(&vmx_l1d_should_flush)) {
+		if (vcpu->arch.l1tf_flush_l1d)
+			vmx_l1d_flush(vcpu);
+	}
+
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
@@ -10188,6 +10206,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)

 	vmcs12->launch_state = 1;

+	/* Hide L1D cache contents from the nested guest.  */
+	vmx->vcpu.arch.l1tf_flush_l1d = true;
+
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_vcpu_halt(vcpu);


--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -169,6 +169,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
 	{ "nmi_injections", VCPU_STAT(nmi_injections) },
+	{ "l1d_flush", VCPU_STAT(l1d_flush) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -4351,6 +4352,9 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v
 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
 				unsigned int bytes, struct x86_exception *exception)
 {
+	/* kvm_write_guest_virt_system can pull in tons of pages. */
+	vcpu->arch.l1tf_flush_l1d = true;
+
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
 					   PFERR_WRITE_MASK, exception);
 }
@@ -5449,6 +5453,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	bool writeback = true;
 	bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;

+	vcpu->arch.l1tf_flush_l1d = true;
+
 	/*
 	 * Clear write_fault_to_shadow_pgtable here to ensure it is
 	 * never reused.
@@ -6785,6 +6791,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;

 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	vcpu->arch.l1tf_flush_l1d = true;

 	for (;;) {
 		if (kvm_vcpu_running(vcpu)) {
@@ -7755,6 +7762,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)

 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	vcpu->arch.l1tf_flush_l1d = true;
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }