Commit 09e3e2a1 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86: Add kvm_x86_ops hook to short circuit emulation

Replace the existing kvm_x86_ops.need_emulation_on_page_fault() with a
more generic is_emulatable(), and unconditionally call the new function
in x86_emulate_instruction().

KVM will use the generic hook to support multiple security related
technologies that prevent emulation in one way or another.  Similar to
the existing AMD #NPF case where emulation of the current instruction is
not possible due to lack of information, AMD's SEV-ES and Intel's SGX
and TDX will introduce scenarios where emulation is impossible due to
the guest's register state being inaccessible.  And again similar to the
existing #NPF case, emulation can be initiated by kvm_mmu_page_fault(),
i.e. outside of the control of vendor-specific code.

While the cause and architecturally visible behavior of the various
cases are different, e.g. SGX will inject a #UD, AMD #NPF is a clean
resume or complete shutdown, and SEV-ES and TDX "return" an error, the
impact on the common emulation code is identical: KVM must stop
emulation immediately and resume the guest.

Query is_emulatable() in handle_ud() as well so that the
force_emulation_prefix code doesn't incorrectly modify RIP before
calling emulate_instruction() in the absurdly unlikely scenario that
KVM encounters forced emulation in conjunction with "do not emulate".

Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200915232702.15945-1-sean.j.christopherson@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent ae5a2a39
...@@ -1221,7 +1221,7 @@ struct kvm_x86_ops { ...@@ -1221,7 +1221,7 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry); int (*get_msr_feature)(struct kvm_msr_entry *entry);
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
......
...@@ -5485,18 +5485,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, ...@@ -5485,18 +5485,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type |= EMULTYPE_ALLOW_RETRY_PF; emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
emulate: emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.
* This can happen if a guest gets a page-fault on data access but the HW
* table walker is not able to read the instruction page (e.g instruction
* page is not present in memory). In those cases we simply restart the
* guest, with the exception of AMD Erratum 1096 which is unrecoverable.
*/
if (unlikely(insn && !insn_len)) {
if (!kvm_x86_ops.need_emulation_on_page_fault(vcpu))
return 1;
}
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len); insn_len);
} }
......
...@@ -3984,19 +3984,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) ...@@ -3984,19 +3984,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
} }
} }
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
{ {
unsigned long cr4 = kvm_read_cr4(vcpu); bool smep, smap, is_user;
bool smep = cr4 & X86_CR4_SMEP; unsigned long cr4;
bool smap = cr4 & X86_CR4_SMAP;
bool is_user = svm_get_cpl(vcpu) == 3;
/*
* If RIP is invalid, go ahead with emulation which will cause an
* internal error exit.
*/
if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
return true;
/* /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh. * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
...@@ -4038,6 +4029,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) ...@@ -4038,6 +4029,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets * instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest. * print the error and request to kill the guest.
*/ */
if (likely(!insn || insn_len))
return true;
/*
* If RIP is invalid, go ahead with emulation which will cause an
* internal error exit.
*/
if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
return true;
cr4 = kvm_read_cr4(vcpu);
smep = cr4 & X86_CR4_SMEP;
smap = cr4 & X86_CR4_SMAP;
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) { if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm)) if (!sev_guest(vcpu->kvm))
return true; return true;
...@@ -4199,7 +4204,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { ...@@ -4199,7 +4204,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_reg_region = svm_register_enc_region, .mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region,
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .can_emulate_instruction = svm_can_emulate_instruction,
.apic_init_signal_blocked = svm_apic_init_signal_blocked, .apic_init_signal_blocked = svm_apic_init_signal_blocked,
}; };
......
...@@ -1561,6 +1561,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) ...@@ -1561,6 +1561,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0; return 0;
} }
static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
{
return true;
}
static int skip_emulated_instruction(struct kvm_vcpu *vcpu) static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{ {
unsigned long rip, orig_rip; unsigned long rip, orig_rip;
...@@ -7749,11 +7754,6 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) ...@@ -7749,11 +7754,6 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
/* RSM will cause a vmexit anyway. */ /* RSM will cause a vmexit anyway. */
} }
static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
return false;
}
static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{ {
return to_vmx(vcpu)->nested.vmxon; return to_vmx(vcpu)->nested.vmxon;
...@@ -7908,7 +7908,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { ...@@ -7908,7 +7908,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.pre_leave_smm = vmx_pre_leave_smm, .pre_leave_smm = vmx_pre_leave_smm,
.enable_smi_window = enable_smi_window, .enable_smi_window = enable_smi_window,
.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, .can_emulate_instruction = vmx_can_emulate_instruction,
.apic_init_signal_blocked = vmx_apic_init_signal_blocked, .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
.migrate_timers = vmx_migrate_timers, .migrate_timers = vmx_migrate_timers,
}; };
......
...@@ -3222,8 +3222,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3222,8 +3222,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* even when not intercepted. AMD manual doesn't explicitly * even when not intercepted. AMD manual doesn't explicitly
* state this but appears to behave the same. * state this but appears to behave the same.
* *
* However when userspace wants to read this MSR, we should * Unconditionally return L1's TSC offset on userspace reads
* return it's real L1 value so that its restore will be correct. * so that userspace reads and writes always operate on L1's
* offset, e.g. to ensure deterministic behavior for migration.
*/ */
u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset : u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
vcpu->arch.tsc_offset; vcpu->arch.tsc_offset;
...@@ -5714,6 +5715,9 @@ int handle_ud(struct kvm_vcpu *vcpu) ...@@ -5714,6 +5715,9 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */ char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e; struct x86_exception e;
if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, NULL, 0)))
return 1;
if (force_emulation_prefix && if (force_emulation_prefix &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu), kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 && sig, sizeof(sig), &e) == 0 &&
...@@ -6919,7 +6923,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -6919,7 +6923,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int r; int r;
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
bool writeback = true; bool writeback = true;
bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; bool write_fault_to_spt;
if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, insn, insn_len)))
return 1;
vcpu->arch.l1tf_flush_l1d = true; vcpu->arch.l1tf_flush_l1d = true;
...@@ -6927,6 +6934,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -6927,6 +6934,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* Clear write_fault_to_shadow_pgtable here to ensure it is * Clear write_fault_to_shadow_pgtable here to ensure it is
* never reused. * never reused.
*/ */
write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
vcpu->arch.write_fault_to_shadow_pgtable = false; vcpu->arch.write_fault_to_shadow_pgtable = false;
kvm_clear_exception_queue(vcpu); kvm_clear_exception_queue(vcpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment