Commit f15a87c0 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-lapic-fix-and-cleanup' into HEAD

The first half or so patches fix semi-urgent, real-world relevant APICv
and AVIC bugs.

The second half fixes a variety of AVIC and optimized APIC map bugs
where KVM doesn't play nice with various edge cases that are
architecturally legal(ish), but are unlikely to occur in most real world
scenarios
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents dc7c31e9 72c70cee
......@@ -37,3 +37,14 @@ Nested virtualization features
------------------------------
TBD
x2APIC
------
When KVM_X2APIC_API_USE_32BIT_IDS is enabled, KVM activates a hack/quirk that
allows sending events to a single vCPU using its x2APIC ID even if the target
vCPU has legacy xAPIC enabled, e.g. to bring up hotplugged vCPUs via INIT-SIPI
on VMs with > 255 vCPUs. A side effect of the quirk is that, if multiple vCPUs
have the same physical APIC ID, KVM will deliver events targeting that APIC ID
only to the vCPU with the lowest vCPU ID. If KVM_X2APIC_API_USE_32BIT_IDS is
not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
matching the target APIC ID receive the interrupt).
......@@ -77,7 +77,6 @@ KVM_X86_OP(set_nmi_mask)
KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
KVM_X86_OP_OPTIONAL(update_cr8_intercept)
KVM_X86_OP(check_apicv_inhibit_reasons)
KVM_X86_OP(refresh_apicv_exec_ctrl)
KVM_X86_OP_OPTIONAL(hwapic_irr_update)
KVM_X86_OP_OPTIONAL(hwapic_isr_update)
......
......@@ -1022,19 +1022,30 @@ struct kvm_arch_memory_slot {
};
/*
* We use as the mode the number of bits allocated in the LDR for the
* logical processor ID. It happens that these are all powers of two.
* This makes it is very easy to detect cases where the APICs are
* configured for multiple modes; in that case, we cannot use the map and
* hence cannot use kvm_irq_delivery_to_apic_fast either.
* Track the mode of the optimized logical map, as the rules for decoding the
* destination vary per mode. Enabling the optimized logical map requires all
* software-enabled local APIs to be in the same mode, each addressable APIC to
* be mapped to only one MDA, and each MDA to map to at most one APIC.
*/
#define KVM_APIC_MODE_XAPIC_CLUSTER 4
#define KVM_APIC_MODE_XAPIC_FLAT 8
#define KVM_APIC_MODE_X2APIC 16
enum kvm_apic_logical_mode {
/* All local APICs are software disabled. */
KVM_APIC_MODE_SW_DISABLED,
/* All software enabled local APICs in xAPIC cluster addressing mode. */
KVM_APIC_MODE_XAPIC_CLUSTER,
/* All software enabled local APICs in xAPIC flat addressing mode. */
KVM_APIC_MODE_XAPIC_FLAT,
/* All software enabled local APICs in x2APIC mode. */
KVM_APIC_MODE_X2APIC,
/*
* Optimized map disabled, e.g. not all local APICs in the same logical
* mode, same logical ID assigned to multiple APICs, etc.
*/
KVM_APIC_MODE_MAP_DISABLED,
};
struct kvm_apic_map {
struct rcu_head rcu;
u8 mode;
enum kvm_apic_logical_mode logical_mode;
u32 max_apic_id;
union {
struct kvm_lapic *xapic_flat_map[8];
......@@ -1164,6 +1175,12 @@ enum kvm_apicv_inhibit {
*/
APICV_INHIBIT_REASON_BLOCKIRQ,
/*
* APICv is disabled because not all vCPUs have a 1:1 mapping between
* APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
*/
APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
/*
* For simplicity, the APIC acceleration is inhibited
* first time either APIC ID or APIC base are changed by the guest
......@@ -1202,6 +1219,12 @@ enum kvm_apicv_inhibit {
* AVIC is disabled because SEV doesn't support it.
*/
APICV_INHIBIT_REASON_SEV,
/*
* AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
* mapping between logical ID and vCPU.
*/
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
};
struct kvm_arch {
......@@ -1250,10 +1273,11 @@ struct kvm_arch {
struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty;
/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
struct rw_semaphore apicv_update_lock;
bool apic_access_memslot_enabled;
bool apic_access_memslot_inhibited;
/* Protects apicv_inhibit_reasons */
struct rw_semaphore apicv_update_lock;
unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
......@@ -1602,6 +1626,8 @@ struct kvm_x86_ops {
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(int isr);
......@@ -1976,7 +2002,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
bool kvm_apicv_activated(struct kvm *kvm);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
enum kvm_apicv_inhibit reason, bool set);
void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
......
This diff is collapsed.
......@@ -112,6 +112,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
int kvm_alloc_apic_access_page(struct kvm *kvm);
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
......
This diff is collapsed.
......@@ -1104,7 +1104,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
* to benefit from it right away.
*/
if (kvm_apicv_activated(vcpu->kvm))
kvm_vcpu_update_apicv(vcpu);
__kvm_vcpu_update_apicv(vcpu);
return 0;
}
......
......@@ -825,7 +825,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
if (intercept == svm->x2avic_msrs_intercepted)
return;
if (avic_mode != AVIC_MODE_X2 ||
if (!x2avic_enabled ||
!apic_x2apic_mode(svm->vcpu.arch.apic))
return;
......@@ -4769,10 +4769,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
.set_virtual_apic_mode = avic_set_virtual_apic_mode,
.set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
.apicv_post_state_restore = avic_apicv_post_state_restore,
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
......@@ -5026,6 +5026,8 @@ static __init int svm_hardware_setup(void)
svm_x86_ops.vcpu_blocking = NULL;
svm_x86_ops.vcpu_unblocking = NULL;
svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
} else if (!x2avic_enabled) {
svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
}
if (vls) {
......
......@@ -35,14 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
enum avic_modes {
AVIC_MODE_NONE = 0,
AVIC_MODE_X1,
AVIC_MODE_X2,
};
extern enum avic_modes avic_mode;
extern bool x2avic_enabled;
/*
* Clean bits in VMCB.
......@@ -628,6 +621,21 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
#define AVIC_REQUIRED_APICV_INHIBITS \
( \
BIT(APICV_INHIBIT_REASON_DISABLE) | \
BIT(APICV_INHIBIT_REASON_ABSENT) | \
BIT(APICV_INHIBIT_REASON_HYPERV) | \
BIT(APICV_INHIBIT_REASON_NESTED) | \
BIT(APICV_INHIBIT_REASON_IRQWIN) | \
BIT(APICV_INHIBIT_REASON_PIT_REINJ) | \
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
BIT(APICV_INHIBIT_REASON_SEV) | \
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \
BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) \
)
bool avic_hardware_setup(struct kvm_x86_ops *ops);
int avic_ga_log_notifier(u32 ga_tag);
......@@ -641,14 +649,13 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
/* sev.c */
......
......@@ -3904,39 +3904,6 @@ static void seg_setup(int seg)
vmcs_write32(sf->ar_bytes, ar);
}
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
void __user *hva;
int ret = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_memslot_enabled)
goto out;
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (IS_ERR(hva)) {
ret = PTR_ERR(hva);
goto out;
}
page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (is_error_page(page)) {
ret = -EFAULT;
goto out;
}
/*
* Do not pin the page in memory, so that memory hot-unplug
* is able to migrate it.
*/
put_page(page);
kvm->arch.apic_access_memslot_enabled = true;
out:
mutex_unlock(&kvm->slots_lock);
return ret;
}
int allocate_vpid(void)
{
int vpid;
......@@ -7490,7 +7457,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs = &vmx->vmcs01;
if (cpu_need_virtualize_apic_accesses(vcpu)) {
err = alloc_apic_access_page(vcpu->kvm);
err = kvm_alloc_apic_access_page(vcpu->kvm);
if (err)
goto free_vmcs;
}
......@@ -8129,17 +8096,16 @@ static void vmx_hardware_unsetup(void)
free_kvm_area();
}
static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
#define VMX_REQUIRED_APICV_INHIBITS \
( \
BIT(APICV_INHIBIT_REASON_DISABLE)| \
BIT(APICV_INHIBIT_REASON_ABSENT) | \
BIT(APICV_INHIBIT_REASON_HYPERV) | \
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \
)
static void vmx_vm_destroy(struct kvm *kvm)
{
......@@ -8225,7 +8191,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.apicv_post_state_restore = vmx_apicv_post_state_restore,
.check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
......
......@@ -10148,7 +10148,7 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
bool activate;
......@@ -10183,7 +10183,30 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv);
static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
return;
/*
* Due to sharing page tables across vCPUs, the xAPIC memslot must be
* deleted if any vCPU has xAPIC virtualization and x2APIC enabled, but
* and hardware doesn't support x2APIC virtualization. E.g. some AMD
* CPUs support AVIC but not x2APIC. KVM still allows enabling AVIC in
* this case so that KVM can the AVIC doorbell to inject interrupts to
* running vCPUs, but KVM must not create SPTEs for the APIC base as
* the vCPU would incorrectly be able to access the vAPIC page via MMIO
* despite being in x2APIC mode. For simplicity, inhibiting the APIC
* access page is sticky.
*/
if (apic_x2apic_mode(vcpu->arch.apic) &&
kvm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization)
kvm_inhibit_apic_access_page(vcpu);
__kvm_vcpu_update_apicv(vcpu);
}
void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
enum kvm_apicv_inhibit reason, bool set)
......@@ -10192,7 +10215,7 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason))
if (!(kvm_x86_ops.required_apicv_inhibits & BIT(reason)))
return;
old = new = kvm->arch.apicv_inhibit_reasons;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment