Commit fa4c027a authored by Santosh Shukla's avatar Santosh Shukla Committed by Sean Christopherson

KVM: x86: Add support for SVM's Virtual NMI

Add support for SVM's Virtual NMIs implementation, which adds proper
tracking of virtual NMI blocking, and an intr_ctrl flag that software can
set to mark a virtual NMI as pending.  Pending virtual NMIs are serviced
by hardware if/when virtual NMIs become unblocked, i.e. act more or less
like real NMIs.

Introduce two new kvm_x86_ops callbacks so to support SVM's vNMI, as KVM
needs to treat a pending vNMI as partially injected.  Specifically, if
two NMIs (for L1) arrive concurrently in KVM's software model, KVM's ABI
is to inject one and pend the other.  Without vNMI, KVM manually tracks
the pending NMI and uses NMI windows to detect when the NMI should be
injected.

With vNMI, the pending NMI is simply stuffed into the VMCB and handed
off to hardware.  This means that KVM needs to be able to set a vNMI
pending on-demand, and also query if a vNMI is pending, e.g. to honor the
"at most one NMI pending" rule and to preserve all NMIs across save and
restore.

Warn if KVM attempts to open an NMI window when vNMI is fully enabled,
as the above logic should prevent KVM from ever getting to
kvm_check_and_inject_events() with two NMIs pending _in software_, and
the "at most one NMI pending" logic should prevent having an NMI pending
in hardware and an NMI pending in software if NMIs are also blocked, i.e.
if KVM can't immediately inject the second NMI.
Signed-off-by: default avatarSantosh Shukla <Santosh.Shukla@amd.com>
Co-developed-by: default avatarMaxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: default avatarMaxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20230227084016.3368-11-santosh.shukla@amd.com
[sean: rewrite shortlog and changelog, massage code comments]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent bdedff26
...@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow) ...@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall) KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq) KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi) KVM_X86_OP(inject_nmi)
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
KVM_X86_OP(inject_exception) KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection) KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed) KVM_X86_OP(interrupt_allowed)
......
...@@ -876,7 +876,8 @@ struct kvm_vcpu_arch { ...@@ -876,7 +876,8 @@ struct kvm_vcpu_arch {
u64 tsc_scaling_ratio; /* current scaling ratio */ u64 tsc_scaling_ratio; /* current scaling ratio */
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */ /* Number of NMIs pending injection, not including hardware vNMIs. */
unsigned int nmi_pending;
bool nmi_injected; /* Trying to inject an NMI this entry */ bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */ bool smi_pending; /* SMI queued after currently running handler */
u8 handling_intr_from_guest; u8 handling_intr_from_guest;
...@@ -1621,6 +1622,13 @@ struct kvm_x86_ops { ...@@ -1621,6 +1622,13 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
/* Whether or not a virtual NMI is pending in hardware. */
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
/*
* Attempt to pend a virtual NMI in harware. Returns %true on success
* to allow using static_call_ret0 as the fallback.
*/
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
...@@ -2005,6 +2013,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); ...@@ -2005,6 +2013,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu); void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu);
......
...@@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644); ...@@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
bool intercept_smi = true; bool intercept_smi = true;
module_param(intercept_smi, bool, 0444); module_param(intercept_smi, bool, 0444);
bool vnmi = true;
module_param(vnmi, bool, 0444);
static bool svm_gp_erratum_intercept = true; static bool svm_gp_erratum_intercept = true;
...@@ -1311,6 +1313,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu) ...@@ -1311,6 +1313,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (kvm_vcpu_apicv_active(vcpu)) if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb); avic_init_vmcb(svm, vmcb);
if (vnmi)
svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;
if (vgif) { if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI); svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI); svm_clr_intercept(svm, INTERCEPT_CLGI);
...@@ -3525,6 +3530,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) ...@@ -3525,6 +3530,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
++vcpu->stat.nmi_injections; ++vcpu->stat.nmi_injections;
} }
static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!is_vnmi_enabled(svm))
return false;
return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
}
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (!is_vnmi_enabled(svm))
return false;
if (svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK)
return false;
svm->vmcb->control.int_ctl |= V_NMI_PENDING_MASK;
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
/*
* Because the pending NMI is serviced by hardware, KVM can't know when
* the NMI is "injected", but for all intents and purposes, passing the
* NMI off to hardware counts as injection.
*/
++vcpu->stat.nmi_injections;
return true;
}
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -3620,6 +3658,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) ...@@ -3620,6 +3658,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE); svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
} }
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (is_vnmi_enabled(svm))
return svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK;
else
return svm->nmi_masked;
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (is_vnmi_enabled(svm)) {
if (masked)
svm->vmcb->control.int_ctl |= V_NMI_BLOCKING_MASK;
else
svm->vmcb->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
} else {
svm->nmi_masked = masked;
if (masked)
svm_set_iret_intercept(svm);
else
svm_clr_iret_intercept(svm);
}
}
bool svm_nmi_blocked(struct kvm_vcpu *vcpu) bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -3631,8 +3698,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu) ...@@ -3631,8 +3698,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return false; return false;
return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || if (svm_get_nmi_mask(vcpu))
svm->nmi_masked; return true;
return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
} }
static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
...@@ -3650,24 +3719,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) ...@@ -3650,24 +3719,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1; return 1;
} }
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->nmi_masked;
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (masked) {
svm->nmi_masked = true;
svm_set_iret_intercept(svm);
} else {
svm->nmi_masked = false;
svm_clr_iret_intercept(svm);
}
}
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -3748,7 +3799,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) ...@@ -3748,7 +3799,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
if (svm->nmi_masked && !svm->awaiting_iret_completion) /*
* KVM should never request an NMI window when vNMI is enabled, as KVM
* allows at most one to-be-injected NMI and one pending NMI, i.e. if
* two NMIs arrive simultaneously, KVM will inject one and set
* V_NMI_PENDING for the other. WARN, but continue with the standard
* single-step approach to try and salvage the pending NMI.
*/
WARN_ON_ONCE(is_vnmi_enabled(svm));
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */ return; /* IRET will cause a vm exit */
if (!gif_set(svm)) { if (!gif_set(svm)) {
...@@ -4797,6 +4857,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { ...@@ -4797,6 +4857,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall, .patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq, .inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi, .inject_nmi = svm_inject_nmi,
.is_vnmi_pending = svm_is_vnmi_pending,
.set_vnmi_pending = svm_set_vnmi_pending,
.inject_exception = svm_inject_exception, .inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection, .cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed, .interrupt_allowed = svm_interrupt_allowed,
...@@ -5090,6 +5152,16 @@ static __init int svm_hardware_setup(void) ...@@ -5090,6 +5152,16 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n"); pr_info("Virtual GIF supported\n");
} }
vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_VNMI);
if (vnmi)
pr_info("Virtual NMI enabled\n");
if (!vnmi) {
svm_x86_ops.is_vnmi_pending = NULL;
svm_x86_ops.set_vnmi_pending = NULL;
}
if (lbrv) { if (lbrv) {
if (!boot_cpu_has(X86_FEATURE_LBRV)) if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false; lbrv = false;
......
...@@ -36,6 +36,7 @@ extern bool npt_enabled; ...@@ -36,6 +36,7 @@ extern bool npt_enabled;
extern int vgif; extern int vgif;
extern bool intercept_smi; extern bool intercept_smi;
extern bool x2avic_enabled; extern bool x2avic_enabled;
extern bool vnmi;
/* /*
* Clean bits in VMCB. * Clean bits in VMCB.
...@@ -548,6 +549,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset) ...@@ -548,6 +549,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
(msr < (APIC_BASE_MSR + 0x100)); (msr < (APIC_BASE_MSR + 0x100));
} }
static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
{
if (!vnmi)
return NULL;
if (is_guest_mode(&svm->vcpu))
return NULL;
else
return svm->vmcb01.ptr;
}
static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
if (vmcb)
return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
else
return false;
}
/* svm.c */ /* svm.c */
#define MSR_INVALID 0xffffffffU #define MSR_INVALID 0xffffffffU
......
...@@ -5125,7 +5125,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, ...@@ -5125,7 +5125,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending; events->nmi.pending = kvm_get_nr_pending_nmis(vcpu);
events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu); events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
/* events->sipi_vector is never valid when reporting to user space */ /* events->sipi_vector is never valid when reporting to user space */
...@@ -10158,13 +10158,31 @@ static void process_nmi(struct kvm_vcpu *vcpu) ...@@ -10158,13 +10158,31 @@ static void process_nmi(struct kvm_vcpu *vcpu)
else else
limit = 2; limit = 2;
/*
* Adjust the limit to account for pending virtual NMIs, which aren't
* tracked in vcpu->arch.nmi_pending.
*/
if (static_call(kvm_x86_is_vnmi_pending)(vcpu))
limit--;
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit); vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
if (vcpu->arch.nmi_pending &&
(static_call(kvm_x86_set_vnmi_pending)(vcpu)))
vcpu->arch.nmi_pending--;
if (vcpu->arch.nmi_pending) if (vcpu->arch.nmi_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu);
} }
/* Return total number of NMIs pending injection to the VM */
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu)
{
return vcpu->arch.nmi_pending +
static_call(kvm_x86_is_vnmi_pending)(vcpu);
}
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap) unsigned long *vcpu_bitmap)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment