Commit ddd5c582 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-fixes-6.12-2' of...

Merge tag 'kvmarm-fixes-6.12-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.12, take #2

- Fix the guest view of the ID registers, making the relevant fields
  writable from userspace (affecting ID_AA64DFR0_EL1 and ID_AA64PFR1_EL1)

- Correcly expose S1PIE to guests, fixing a regression introduced
  in 6.12-rc1 with the S1POE support

- Fix the recycling of stage-2 shadow MMUs by tracking the context
  (are we allowed to block or not) as well as the recycling state

- Address a couple of issues with the vgic when userspace misconfigures
  the emulation, resulting in various splats. Headaches courtesy
  of our Syzkaller friends
parents 3ec4350d df5fd75e
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) #define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET) KVM_DIRTY_LOG_INITIALLY_SET)
...@@ -211,6 +212,12 @@ struct kvm_s2_mmu { ...@@ -211,6 +212,12 @@ struct kvm_s2_mmu {
*/ */
bool nested_stage2_enabled; bool nested_stage2_enabled;
/*
* true when this MMU needs to be unmapped before being used for a new
* purpose.
*/
bool pending_unmap;
/* /*
* 0: Nobody is currently using this, check vttbr for validity * 0: Nobody is currently using this, check vttbr for validity
* >0: Somebody is actively using this. * >0: Somebody is actively using this.
......
...@@ -166,7 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, ...@@ -166,7 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void); void __init free_hyp_pgds(void);
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size); void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
u64 size, bool may_block);
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
......
...@@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, ...@@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
struct kvm_s2_trans { struct kvm_s2_trans {
phys_addr_t output; phys_addr_t output;
unsigned long block_size; unsigned long block_size;
...@@ -124,7 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, ...@@ -124,7 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
struct kvm_s2_trans *trans); struct kvm_s2_trans *trans);
extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
extern void kvm_nested_s2_wp(struct kvm *kvm); extern void kvm_nested_s2_wp(struct kvm *kvm);
extern void kvm_nested_s2_unmap(struct kvm *kvm); extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
extern void kvm_nested_s2_flush(struct kvm *kvm); extern void kvm_nested_s2_flush(struct kvm *kvm);
unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
......
...@@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) ...@@ -997,6 +997,9 @@ static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
static int check_vcpu_requests(struct kvm_vcpu *vcpu) static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{ {
if (kvm_request_pending(vcpu)) { if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
return -EIO;
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
kvm_vcpu_sleep(vcpu); kvm_vcpu_sleep(vcpu);
...@@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) ...@@ -1031,6 +1034,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_dirty_ring_check_request(vcpu)) if (kvm_dirty_ring_check_request(vcpu))
return 0; return 0;
check_nested_vcpu_requests(vcpu);
} }
return 1; return 1;
......
...@@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) ...@@ -317,7 +317,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
* to the guest, and hide SSBS so that the * to the guest, and hide SSBS so that the
* guest stays protected. * guest stays protected.
*/ */
if (cpus_have_final_cap(ARM64_SSBS)) if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
break; break;
fallthrough; fallthrough;
case SPECTRE_UNAFFECTED: case SPECTRE_UNAFFECTED:
...@@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) ...@@ -428,7 +428,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
* Convert the workaround level into an easy-to-compare number, where higher * Convert the workaround level into an easy-to-compare number, where higher
* values mean better protection. * values mean better protection.
*/ */
static int get_kernel_wa_level(u64 regid) static int get_kernel_wa_level(struct kvm_vcpu *vcpu, u64 regid)
{ {
switch (regid) { switch (regid) {
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
...@@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid) ...@@ -449,7 +449,7 @@ static int get_kernel_wa_level(u64 regid)
* don't have any FW mitigation if SSBS is there at * don't have any FW mitigation if SSBS is there at
* all times. * all times.
*/ */
if (cpus_have_final_cap(ARM64_SSBS)) if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SSBS, IMP))
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
fallthrough; fallthrough;
case SPECTRE_UNAFFECTED: case SPECTRE_UNAFFECTED:
...@@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ...@@ -486,7 +486,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; val = get_kernel_wa_level(vcpu, reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
break; break;
case KVM_REG_ARM_STD_BMAP: case KVM_REG_ARM_STD_BMAP:
val = READ_ONCE(smccc_feat->std_bmap); val = READ_ONCE(smccc_feat->std_bmap);
...@@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ...@@ -588,7 +588,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (val & ~KVM_REG_FEATURE_LEVEL_MASK) if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
return -EINVAL; return -EINVAL;
if (get_kernel_wa_level(reg->id) < val) if (get_kernel_wa_level(vcpu, reg->id) < val)
return -EINVAL; return -EINVAL;
return 0; return 0;
...@@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ...@@ -624,7 +624,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
* other way around. * other way around.
*/ */
if (get_kernel_wa_level(reg->id) < wa_level) if (get_kernel_wa_level(vcpu, reg->id) < wa_level)
return -EINVAL; return -EINVAL;
return 0; return 0;
......
...@@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 ...@@ -328,9 +328,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
may_block)); may_block));
} }
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
u64 size, bool may_block)
{ {
__unmap_stage2_range(mmu, start, size, true); __unmap_stage2_range(mmu, start, size, may_block);
} }
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
...@@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, ...@@ -1015,7 +1016,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) { if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr); gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start); kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start, true);
} }
hva = vm_end; hva = vm_end;
} while (hva < reg_end); } while (hva < reg_end);
...@@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm) ...@@ -1042,7 +1043,7 @@ void stage2_unmap_vm(struct kvm *kvm)
kvm_for_each_memslot(memslot, bkt, slots) kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot); stage2_unmap_memslot(kvm, memslot);
kvm_nested_s2_unmap(kvm); kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm); mmap_read_unlock(current->mm);
...@@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) ...@@ -1912,7 +1913,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
(range->end - range->start) << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT,
range->may_block); range->may_block);
kvm_nested_s2_unmap(kvm); kvm_nested_s2_unmap(kvm, range->may_block);
return false; return false;
} }
...@@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, ...@@ -2179,8 +2180,8 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT;
write_lock(&kvm->mmu_lock); write_lock(&kvm->mmu_lock);
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size); kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size, true);
kvm_nested_s2_unmap(kvm); kvm_nested_s2_unmap(kvm, true);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
} }
......
...@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) ...@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
/* Set the scene for the next search */ /* Set the scene for the next search */
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size; kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
/* Clear the old state */ /* Make sure we don't forget to do the laundry */
if (kvm_s2_mmu_valid(s2_mmu)) if (kvm_s2_mmu_valid(s2_mmu))
kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu)); s2_mmu->pending_unmap = true;
/* /*
* The virtual VMID (modulo CnP) will be used as a key when matching * The virtual VMID (modulo CnP) will be used as a key when matching
...@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) ...@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
out: out:
atomic_inc(&s2_mmu->refcnt); atomic_inc(&s2_mmu->refcnt);
/*
* Set the vCPU request to perform an unmap, even if the pending unmap
* originates from another vCPU. This guarantees that the MMU has been
* completely unmapped before any vCPU actually uses it, and allows
* multiple vCPUs to lend a hand with completing the unmap.
*/
if (s2_mmu->pending_unmap)
kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);
return s2_mmu; return s2_mmu;
} }
...@@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu) ...@@ -663,6 +673,13 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{ {
/*
* The vCPU kept its reference on the MMU after the last put, keep
* rolling with it.
*/
if (vcpu->arch.hw_mmu)
return;
if (is_hyp_ctxt(vcpu)) { if (is_hyp_ctxt(vcpu)) {
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
} else { } else {
...@@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) ...@@ -674,10 +691,18 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{ {
if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) { /*
* Keep a reference on the associated stage-2 MMU if the vCPU is
* scheduling out and not in WFI emulation, suggesting it is likely to
* reuse the MMU sometime soon.
*/
if (vcpu->scheduled_out && !vcpu_get_flag(vcpu, IN_WFI))
return;
if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu))
atomic_dec(&vcpu->arch.hw_mmu->refcnt); atomic_dec(&vcpu->arch.hw_mmu->refcnt);
vcpu->arch.hw_mmu = NULL;
} vcpu->arch.hw_mmu = NULL;
} }
/* /*
...@@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm) ...@@ -730,7 +755,7 @@ void kvm_nested_s2_wp(struct kvm *kvm)
} }
} }
void kvm_nested_s2_unmap(struct kvm *kvm) void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block)
{ {
int i; int i;
...@@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm) ...@@ -740,7 +765,7 @@ void kvm_nested_s2_unmap(struct kvm *kvm)
struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
if (kvm_s2_mmu_valid(mmu)) if (kvm_s2_mmu_valid(mmu))
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu)); kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), may_block);
} }
} }
...@@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm) ...@@ -1184,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
return 0; return 0;
} }
void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
write_lock(&vcpu->kvm->mmu_lock);
if (mmu->pending_unmap) {
kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
mmu->pending_unmap = false;
}
write_unlock(&vcpu->kvm->mmu_lock);
}
}
...@@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ...@@ -1527,6 +1527,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
break; break;
case SYS_ID_AA64PFR2_EL1: case SYS_ID_AA64PFR2_EL1:
/* We only expose FPMR */ /* We only expose FPMR */
...@@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ...@@ -1550,7 +1558,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break; break;
case SYS_ID_AA64MMFR3_EL1: case SYS_ID_AA64MMFR3_EL1:
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE; val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
ID_AA64MMFR3_EL1_S1PIE;
break; break;
case SYS_ID_MMFR4_EL1: case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
...@@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { ...@@ -2376,7 +2385,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), }, ID_AA64PFR0_EL1_FP), },
ID_SANITISED(ID_AA64PFR1_EL1), ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR |
ID_AA64PFR1_EL1_DF2 |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
ID_AA64PFR1_EL1_GCS |
ID_AA64PFR1_EL1_MTE_frac |
ID_AA64PFR1_EL1_NMI |
ID_AA64PFR1_EL1_RNDR_trap |
ID_AA64PFR1_EL1_SME |
ID_AA64PFR1_EL1_RES0 |
ID_AA64PFR1_EL1_MPAM_frac |
ID_AA64PFR1_EL1_RAS_frac |
ID_AA64PFR1_EL1_MTE)),
ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3), ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
...@@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = { ...@@ -2390,7 +2411,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.get_user = get_id_reg, .get_user = get_id_reg,
.set_user = set_id_aa64dfr0_el1, .set_user = set_id_aa64dfr0_el1,
.reset = read_sanitised_id_aa64dfr0_el1, .reset = read_sanitised_id_aa64dfr0_el1,
.val = ID_AA64DFR0_EL1_PMUVer_MASK | /*
* Prior to FEAT_Debugv8.9, the architecture defines context-aware
* breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs).
* KVM does not trap + emulate the breakpoint registers, and as such
* cannot support a layout that misaligns with the underlying hardware.
* While it may be possible to describe a subset that aligns with
* hardware, just prevent changes to BRPs and CTX_CMPs altogether for
* simplicity.
*
* See DDI0487K.a, section D2.8.3 Breakpoint types and linking
* of breakpoints for more details.
*/
.val = ID_AA64DFR0_EL1_DoubleLock_MASK |
ID_AA64DFR0_EL1_WRPs_MASK |
ID_AA64DFR0_EL1_PMUVer_MASK |
ID_AA64DFR0_EL1_DebugVer_MASK, }, ID_AA64DFR0_EL1_DebugVer_MASK, },
ID_SANITISED(ID_AA64DFR1_EL1), ID_SANITISED(ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,2),
...@@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ...@@ -2433,6 +2468,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV | ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)), ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)), ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1), ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5), ID_UNALLOCATED(7,5),
...@@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, ...@@ -2903,7 +2939,7 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the * Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the
* corresponding VMIDs. * corresponding VMIDs.
*/ */
kvm_nested_s2_unmap(vcpu->kvm); kvm_nested_s2_unmap(vcpu->kvm, true);
write_unlock(&vcpu->kvm->mmu_lock); write_unlock(&vcpu->kvm->mmu_lock);
...@@ -2955,7 +2991,30 @@ union tlbi_info { ...@@ -2955,7 +2991,30 @@ union tlbi_info {
static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu, static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu,
const union tlbi_info *info) const union tlbi_info *info)
{ {
kvm_stage2_unmap_range(mmu, info->range.start, info->range.size); /*
* The unmap operation is allowed to drop the MMU lock and block, which
* means that @mmu could be used for a different context than the one
* currently being invalidated.
*
* This behavior is still safe, as:
*
* 1) The vCPU(s) that recycled the MMU are responsible for invalidating
* the entire MMU before reusing it, which still honors the intent
* of a TLBI.
*
* 2) Until the guest TLBI instruction is 'retired' (i.e. increment PC
* and ERET to the guest), other vCPUs are allowed to use stale
* translations.
*
* 3) Accidentally unmapping an unrelated MMU context is nonfatal, and
* at worst may cause more aborts for shadow stage-2 fills.
*
* Dropping the MMU lock also implies that shadow stage-2 fills could
* happen behind the back of the TLBI. This is still safe, though, as
* the L1 needs to put its stage-2 in a consistent state before doing
* the TLBI.
*/
kvm_stage2_unmap_range(mmu, info->range.start, info->range.size, true);
} }
static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
...@@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu, ...@@ -3050,7 +3109,11 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu,
max_size = compute_tlb_inval_range(mmu, info->ipa.addr); max_size = compute_tlb_inval_range(mmu, info->ipa.addr);
base_addr &= ~(max_size - 1); base_addr &= ~(max_size - 1);
kvm_stage2_unmap_range(mmu, base_addr, max_size); /*
* See comment in s2_mmu_unmap_range() for why this is allowed to
* reschedule.
*/
kvm_stage2_unmap_range(mmu, base_addr, max_size, true);
} }
static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
......
...@@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) ...@@ -417,8 +417,28 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->private_irqs); kfree(vgic_cpu->private_irqs);
vgic_cpu->private_irqs = NULL; vgic_cpu->private_irqs = NULL;
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
/*
* If this vCPU is being destroyed because of a failed creation
* then unregister the redistributor to avoid leaving behind a
* dangling pointer to the vCPU struct.
*
* vCPUs that have been successfully created (i.e. added to
* kvm->vcpu_array) get unregistered in kvm_vgic_destroy(), as
* this function gets called while holding kvm->arch.config_lock
* in the VM teardown path and would otherwise introduce a lock
* inversion w.r.t. kvm->srcu.
*
* vCPUs that failed creation are torn down outside of the
* kvm->arch.config_lock and do not get unregistered in
* kvm_vgic_destroy(), meaning it is both safe and necessary to
* do so here.
*/
if (kvm_get_vcpu_by_id(vcpu->kvm, vcpu->vcpu_id) != vcpu)
vgic_unregister_redist_iodev(vcpu);
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
}
} }
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
...@@ -536,10 +556,10 @@ int kvm_vgic_map_resources(struct kvm *kvm) ...@@ -536,10 +556,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
out: out:
mutex_unlock(&kvm->arch.config_lock); mutex_unlock(&kvm->arch.config_lock);
out_slots: out_slots:
mutex_unlock(&kvm->slots_lock);
if (ret) if (ret)
kvm_vgic_destroy(kvm); kvm_vm_dead(kvm);
mutex_unlock(&kvm->slots_lock);
return ret; return ret;
} }
......
...@@ -68,6 +68,8 @@ struct test_feature_reg { ...@@ -68,6 +68,8 @@ struct test_feature_reg {
} }
static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
REG_FTR_END, REG_FTR_END,
...@@ -134,6 +136,13 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { ...@@ -134,6 +136,13 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_END, REG_FTR_END,
}; };
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
...@@ -200,6 +209,7 @@ static struct test_feature_reg test_regs[] = { ...@@ -200,6 +209,7 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
...@@ -569,9 +579,9 @@ int main(void) ...@@ -569,9 +579,9 @@ int main(void)
test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
ARRAY_SIZE(test_regs) + 2; ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2;
ksft_set_plan(test_cnt); ksft_set_plan(test_cnt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment