Commit 675a15f4 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-fixes-6.5-1' of...

Merge tag 'kvmarm-fixes-6.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.5, part #1

 - Avoid pKVM finalization if KVM initialization fails

 - Add missing BTI instructions in the hypervisor, fixing an early boot
   failure on BTI systems

 - Handle MMU notifiers correctly for non hugepage-aligned memslots

 - Work around a bug in the architecture where hypervisor timer controls
   have UNKNOWN behavior under nested virt.

 - Disable preemption in kvm_arch_hardware_enable(), fixing a kernel BUG
   in cpu hotplug resulting from per-CPU accessor sanity checking.

 - Make WFI emulation on GICv4 systems robust w.r.t. preemption,
   consistently requesting a doorbell interrupt on vcpu_put()

 - Uphold RES0 sysreg behavior when emulating older PMU versions

 - Avoid macro expansion when initializing PMU register names, ensuring
   the tracepoints pretty-print the sysreg.
parents fdf0eaf1 9d2a55b4
......@@ -727,6 +727,8 @@ struct kvm_vcpu_arch {
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
/* PMUSERENR for the guest EL0 is on physical CPU */
#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
/* WFI instruction trapped */
#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
......
......@@ -608,22 +608,26 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
/**
* kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
* flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @size: Size of the address range to visit.
* @mkold: True if the access flag should be cleared.
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* clear the access flag in that entry.
* Tests and conditionally clears the access flag for every valid, leaf
* page-table entry used to translate the range [@addr, @addr + @size).
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
* Return: The old page-table entry prior to clearing the flag, 0 on failure.
* Return: True if any of the visited PTEs had the access flag set.
*/
kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
u64 size, bool mkold);
/**
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
......@@ -645,18 +649,6 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
* access flag set.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
*
* The offset of @addr within a page is ignored.
*
* Return: True if the page-table entry has the access flag set, false otherwise.
*/
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
* of Coherency for guest stage-2 address
......
......@@ -78,6 +78,7 @@ extern u32 __boot_cpu_mode[2];
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
bool is_kvm_arm_initialised(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
......
......@@ -827,8 +827,8 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
/* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
sysreg_clear_set(cntkctl_el1, clr, set);
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
sysreg_clear_set(cnthctl_el2, clr, set);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
......@@ -1563,7 +1563,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
void kvm_timer_init_vhe(void)
{
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
......
......@@ -53,11 +53,16 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
static bool vgic_present;
static bool vgic_present, kvm_arm_initialised;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
bool is_kvm_arm_initialised(void)
{
return kvm_arm_initialised;
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
......@@ -713,13 +718,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
vcpu_set_flag(vcpu, IN_WFI);
vgic_v4_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
preempt_disable();
vcpu_clear_flag(vcpu, IN_WFI);
vgic_v4_load(vcpu);
preempt_enable();
}
......@@ -787,7 +794,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
/* The distributor enable bits were changed */
preempt_disable();
vgic_v4_put(vcpu, false);
vgic_v4_put(vcpu);
vgic_v4_load(vcpu);
preempt_enable();
}
......@@ -1867,8 +1874,17 @@ static void _kvm_arch_hardware_enable(void *discard)
int kvm_arch_hardware_enable(void)
{
int was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
int was_enabled;
/*
* Most calls to this function are made with migration
* disabled, but not with preemption disabled. The former is
* enough to ensure correctness, but most of the helpers
* expect the later and will throw a tantrum otherwise.
*/
preempt_disable();
was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
_kvm_arch_hardware_enable(NULL);
if (!was_enabled) {
......@@ -1876,6 +1892,8 @@ int kvm_arch_hardware_enable(void)
kvm_timer_cpu_up();
}
preempt_enable();
return 0;
}
......@@ -2482,6 +2500,8 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
kvm_arm_initialised = true;
return 0;
out_subs:
......
......@@ -154,6 +154,12 @@ SYM_CODE_END(\label)
esb
stp x0, x1, [sp, #-16]!
662:
/*
* spectre vectors __bp_harden_hyp_vecs generate br instructions at runtime
* that jump at offset 8 at __kvm_hyp_vector.
* As hyp .text is guarded section, it needs bti j.
*/
bti j
b \target
check_preamble_length 661b, 662b
......@@ -165,6 +171,8 @@ check_preamble_length 661b, 662b
nop
stp x0, x1, [sp, #-16]!
662:
/* Check valid_vect */
bti j
b \target
check_preamble_length 661b, 662b
......
......@@ -297,3 +297,13 @@ SYM_CODE_START(__kvm_hyp_host_forward_smc)
ret
SYM_CODE_END(__kvm_hyp_host_forward_smc)
/*
* kvm_host_psci_cpu_entry is called through br instruction, which requires
* bti j instruction as compilers (gcc and llvm) doesn't insert bti j for external
* functions, but bti c instead.
*/
SYM_CODE_START(kvm_host_psci_cpu_entry)
bti j
b __kvm_host_psci_cpu_entry
SYM_CODE_END(kvm_host_psci_cpu_entry)
......@@ -200,7 +200,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
__hyp_pa(init_params), 0);
}
asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
{
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
......
......@@ -1195,25 +1195,54 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
return pte;
}
kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
struct stage2_age_data {
bool mkold;
bool young;
};
static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
&pte, NULL, 0);
kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
struct stage2_age_data *data = ctx->arg;
if (!kvm_pte_valid(ctx->old) || new == ctx->old)
return 0;
data->young = true;
/*
* stage2_age_walker() is always called while holding the MMU lock for
* write, so this will always succeed. Nonetheless, this deliberately
* follows the race detection pattern of the other stage-2 walkers in
* case the locking mechanics of the MMU notifiers is ever changed.
*/
if (data->mkold && !stage2_try_set_pte(ctx, new))
return -EAGAIN;
/*
* "But where's the TLBI?!", you scream.
* "Over in the core code", I sigh.
*
* See the '->clear_flush_young()' callback on the KVM mmu notifier.
*/
return pte;
return 0;
}
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
u64 size, bool mkold)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);
return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
struct stage2_age_data data = {
.mkold = mkold,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_age_walker,
.arg = &data,
.flags = KVM_PGTABLE_WALK_LEAF,
};
WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
return data.young;
}
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
......
......@@ -1756,27 +1756,25 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
kvm_pte_t kpte;
pte_t pte;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT);
pte = __pte(kpte);
return pte_valid(pte) && pte_young(pte);
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, true);
}
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
if (!kvm->arch.mmu.pgt)
return false;
return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT);
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, false);
}
phys_addr_t kvm_mmu_get_httbr(void)
......
......@@ -244,7 +244,7 @@ static int __init finalize_pkvm(void)
{
int ret;
if (!is_protected_kvm_enabled())
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
return 0;
/*
......
......@@ -986,7 +986,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
__vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
......@@ -1115,18 +1114,19 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{ SYS_DESC(SYS_DBGWCRn_EL1(n)), \
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
#define PMU_SYS_REG(r) \
SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
#define PMU_SYS_REG(name) \
SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \
.visibility = pmu_visibility
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
{ PMU_SYS_REG(PMEVCNTRn_EL0(n)), \
.reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
{ PMU_SYS_REG(PMEVTYPERn_EL0(n)), \
.reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
......@@ -2115,9 +2115,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMBSR_EL1), undef_access },
/* PMBIDR_EL1 is not trapped */
{ PMU_SYS_REG(SYS_PMINTENSET_EL1),
{ PMU_SYS_REG(PMINTENSET_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ PMU_SYS_REG(SYS_PMINTENCLR_EL1),
{ PMU_SYS_REG(PMINTENCLR_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
......@@ -2164,41 +2164,41 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
{ SYS_DESC(SYS_SVCR), undef_access },
{ PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
.reset = reset_pmcr, .reg = PMCR_EL0 },
{ PMU_SYS_REG(SYS_PMCNTENSET_EL0),
{ PMU_SYS_REG(PMCNTENSET_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMCNTENCLR_EL0),
{ PMU_SYS_REG(PMCNTENCLR_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
{ PMU_SYS_REG(PMOVSCLR_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
/*
* PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
* previously (and pointlessly) advertised in the past...
*/
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
{ PMU_SYS_REG(PMSWINC_EL0),
.get_user = get_raz_reg, .set_user = set_wi_reg,
.access = access_pmswinc, .reset = NULL },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
{ PMU_SYS_REG(PMSELR_EL0),
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
{ PMU_SYS_REG(SYS_PMCEID0_EL0),
{ PMU_SYS_REG(PMCEID0_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
{ PMU_SYS_REG(PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
{ PMU_SYS_REG(PMCCNTR_EL0),
.access = access_pmu_evcntr, .reset = reset_unknown,
.reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
{ PMU_SYS_REG(PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
{ PMU_SYS_REG(PMXEVCNTR_EL0),
.access = access_pmu_evcntr, .reset = NULL },
/*
* PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr,
{ PMU_SYS_REG(PMUSERENR_EL0), .access = access_pmuserenr,
.reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 },
{ PMU_SYS_REG(SYS_PMOVSSET_EL0),
{ PMU_SYS_REG(PMOVSSET_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
......@@ -2354,7 +2354,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
{ PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
......
......@@ -749,7 +749,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
WARN_ON(vgic_v4_put(vcpu, false));
WARN_ON(vgic_v4_put(vcpu));
vgic_v3_vmcr_sync(vcpu);
......
......@@ -336,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
return its_make_vpe_non_resident(vpe, need_db);
return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
......@@ -354,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
if (vcpu_get_flag(vcpu, IN_WFI))
return 0;
/*
* Before making the VPE resident, make sure the redistributor
* corresponding to our current CPU expects us here. See the
......
......@@ -431,7 +431,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
int vgic_v4_put(struct kvm_vcpu *vcpu);
/* CPU HP callbacks */
void kvm_vgic_cpu_up(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment