Commit be08c3cf authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/pkvm/fixed-features into kvmarm-master/next

* kvm-arm64/pkvm/fixed-features: (22 commits)
  : .
  : Add the pKVM fixed feature that allows a bunch of exceptions
  : to either be forbidden or be easily handled at EL2.
  : .
  KVM: arm64: pkvm: Give priority to standard traps over pvm handling
  KVM: arm64: pkvm: Pass vpcu instead of kvm to kvm_get_exit_handler_array()
  KVM: arm64: pkvm: Move kvm_handle_pvm_restricted around
  KVM: arm64: pkvm: Consolidate include files
  KVM: arm64: pkvm: Preserve pending SError on exit from AArch32
  KVM: arm64: pkvm: Handle GICv3 traps as required
  KVM: arm64: pkvm: Drop sysregs that should never be routed to the host
  KVM: arm64: pkvm: Drop AArch32-specific registers
  KVM: arm64: pkvm: Make the ERR/ERX*_EL1 registers RAZ/WI
  KVM: arm64: pkvm: Use a single function to expose all id-regs
  KVM: arm64: Fix early exit ptrauth handling
  KVM: arm64: Handle protected guests at 32 bits
  KVM: arm64: Trap access to pVM restricted features
  KVM: arm64: Move sanitized copies of CPU features
  KVM: arm64: Initialize trap registers for protected VMs
  KVM: arm64: Add handlers for protected VM System Registers
  KVM: arm64: Simplify masking out MTE in feature id reg
  KVM: arm64: Add missing field descriptor for MDCR_EL2
  KVM: arm64: Pass struct kvm to per-EC handlers
  KVM: arm64: Move early handlers to per-EC handlers
  ...
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents 5f8b2591 07305590
......@@ -295,6 +295,7 @@
#define MDCR_EL2_HPMFZO (UL(1) << 29)
#define MDCR_EL2_MTPME (UL(1) << 28)
#define MDCR_EL2_TDCC (UL(1) << 27)
#define MDCR_EL2_HLP (UL(1) << 26)
#define MDCR_EL2_HCCD (UL(1) << 23)
#define MDCR_EL2_TTRF (UL(1) << 19)
#define MDCR_EL2_HPMD (UL(1) << 17)
......
......@@ -74,6 +74,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
......
......@@ -781,6 +781,8 @@ static inline bool kvm_vm_is_protected(struct kvm *kvm)
return false;
}
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
......
......@@ -115,7 +115,12 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
#endif
extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
#endif /* __ARM64_KVM_HYP_H__ */
......@@ -622,6 +622,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_arm_pmu_v3_enable(vcpu);
/*
* Initialize traps for protected VMs.
* NOTE: Move to run in EL2 directly, rather than via a hypercall, once
* the code is in place for first run initialization at EL2.
*/
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
return ret;
}
......@@ -1819,8 +1827,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
void *addr = phys_to_virt(hyp_mem_base);
int ret;
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
if (ret)
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#ifndef __ARM64_KVM_HYP_FAULT_H__
#define __ARM64_KVM_HYP_FAULT_H__
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
/*
* Resolve the IPA the hard way using the guest VA.
*
* Stage-1 translation already validated the memory access
* rights. As such, we can use the EL1 translation regime, and
* don't have to distinguish between EL0 and EL1 access.
*
* We do need to save/restore PAR_EL1 though, as we haven't
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
if (!__kvm_at("s1e1r", far))
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
write_sysreg(par, par_el1);
if (unlikely(tmp & SYS_PAR_EL1_F))
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
*hpfar = PAR_TO_HPFAR(tmp);
return true;
}
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
{
u64 hpfar, far;
far = read_sysreg_el2(SYS_FAR);
/*
* The HPFAR can be invalid if the stage 2 fault did not
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
* bit is clear) and one of the two following cases are true:
* 1. The fault was due to a permission fault
* 2. The processor carries errata 834220
*
* Therefore, for all non S1PTW faults where we either have a
* permission fault or the errata workaround is enabled, we
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
hpfar = read_sysreg(hpfar_el2);
}
fault->far_el2 = far;
fault->hpfar_el2 = hpfar;
return true;
}
#endif
......@@ -8,6 +8,7 @@
#define __ARM64_KVM_HYP_SWITCH_H__
#include <hyp/adjust_pc.h>
#include <hyp/fault.h>
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
......@@ -133,78 +134,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
/*
* Resolve the IPA the hard way using the guest VA.
*
* Stage-1 translation already validated the memory access
* rights. As such, we can use the EL1 translation regime, and
* don't have to distinguish between EL0 and EL1 access.
*
* We do need to save/restore PAR_EL1 though, as we haven't
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
if (!__kvm_at("s1e1r", far))
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
write_sysreg(par, par_el1);
if (unlikely(tmp & SYS_PAR_EL1_F))
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
*hpfar = PAR_TO_HPFAR(tmp);
return true;
}
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
{
u64 hpfar, far;
far = read_sysreg_el2(SYS_FAR);
/*
* The HPFAR can be invalid if the stage 2 fault did not
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
* bit is clear) and one of the two following cases are true:
* 1. The fault was due to a permission fault
* 2. The processor carries errata 834220
*
* Therefore, for all non S1PTW faults where we either have a
* permission fault or the errata workaround is enabled, we
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
hpfar = read_sysreg(hpfar_el2);
}
fault->far_el2 = far;
fault->hpfar_el2 = hpfar;
return true;
}
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
{
u8 ec;
u64 esr;
esr = vcpu->arch.fault.esr_el2;
ec = ESR_ELx_EC(esr);
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
return __get_fault_info(esr, &vcpu->arch.fault);
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
......@@ -225,8 +157,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
/* Check for an FPSIMD/SVE trap and handle as appropriate */
static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
* If FP/SIMD is not implemented, handle the trap and inject an undefined
* instruction exception to the guest. Similarly for trapped SVE accesses.
*/
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest, sve_host;
u8 esr_ec;
......@@ -244,9 +181,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
}
esr_ec = kvm_vcpu_trap_get_class(vcpu);
if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
esr_ec != ESR_ELx_EC_SVE)
return false;
/* Don't handle SVE traps for non-SVE vcpus here: */
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
......@@ -348,14 +282,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
static inline bool esr_is_ptrauth_trap(u32 esr)
{
u32 ec = ESR_ELx_EC(esr);
if (ec == ESR_ELx_EC_PAC)
return true;
if (ec != ESR_ELx_EC_SYS64)
return false;
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
case SYS_APIAKEYHI_EL1:
......@@ -384,13 +310,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm_cpu_context *ctxt;
u64 val;
if (!vcpu_has_ptrauth(vcpu) ||
!esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
if (!vcpu_has_ptrauth(vcpu))
return false;
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
......@@ -409,6 +334,90 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
return true;
}
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
handle_tx2_tvm(vcpu))
return true;
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
return kvm_hyp_handle_ptrauth(vcpu, exit_code);
return false;
}
static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
return false;
}
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
if (valid) {
int ret = __vgic_v2_perform_cpuif_access(vcpu);
if (ret == 1)
return true;
/* Promote an illegal access to an SError.*/
if (ret == -1)
*exit_code = ARM_EXCEPTION_EL1_SERROR;
}
}
return false;
}
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
* Returns true if the hypervisor handled the exit, and control should go back
* to the guest, or false if it hasn't.
*/
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
exit_handler_fn fn;
fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
if (fn)
return fn(vcpu, exit_code);
return false;
}
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
......@@ -443,59 +452,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
handle_tx2_tvm(vcpu))
/* Check if there's an exit handler and allow it to handle the exit. */
if (kvm_hyp_handle_exit(vcpu, exit_code))
goto guest;
/*
* We trap the first access to the FP/SIMD to save the host context
* and restore the guest context lazily.
* If FP/SIMD is not implemented, handle the trap and inject an
* undefined instruction exception to the guest.
* Similarly for trapped SVE accesses.
*/
if (__hyp_handle_fpsimd(vcpu))
goto guest;
if (__hyp_handle_ptrauth(vcpu))
goto guest;
if (!__populate_fault_info(vcpu))
goto guest;
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
if (valid) {
int ret = __vgic_v2_perform_cpuif_access(vcpu);
if (ret == 1)
goto guest;
/* Promote an illegal access to an SError.*/
if (ret == -1)
*exit_code = ARM_EXCEPTION_EL1_SERROR;
goto exit;
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1)
goto guest;
}
exit:
/* Return to the host kernel and handle the exit */
return false;
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_FIXED_CONFIG_H__
#define __ARM64_KVM_FIXED_CONFIG_H__
#include <asm/sysreg.h>
/*
* This file contains definitions for features to be allowed or restricted for
* guest virtual machines, depending on the mode KVM is running in and on the
* type of guest that is running.
*
* The ALLOW masks represent a bitmask of feature fields that are allowed
* without any restrictions as long as they are supported by the system.
*
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
* features that are restricted to support at most the specified feature.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
)
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
int kvm_check_pvm_sysreg_table(void);
#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
......@@ -15,4 +15,6 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
......@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-y += $(lib-objs)
......
......@@ -4,7 +4,7 @@
* Author: Andrew Scull <ascull@google.com>
*/
#include <hyp/switch.h>
#include <hyp/adjust_pc.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
......@@ -160,6 +160,14 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
__pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
}
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
......@@ -187,6 +195,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__vgic_v3_write_vmcr),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
......
......@@ -11,7 +11,7 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
#include <hyp/switch.h>
#include <hyp/fault.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
......@@ -25,12 +25,6 @@ struct host_kvm host_kvm;
static struct hyp_pool host_s2_pool;
/*
* Copies of the host's CPU features registers holding sanitized values.
*/
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <nvhe/fixed_config.h>
#include <nvhe/trap_handler.h>
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
{
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
u64 hcr_set = HCR_RW;
u64 hcr_clear = 0;
u64 cptr_set = 0;
/* Protected KVM does not support AArch32 guests. */
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
/*
* Linux guests assume support for floating-point and Advanced SIMD. Do
* not change the trapping behavior for these from the KVM default.
*/
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
PVM_ID_AA64PFR0_ALLOW));
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
PVM_ID_AA64PFR0_ALLOW));
/* Trap RAS unless all current versions are supported */
if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
ID_AA64PFR0_RAS_V1P1) {
hcr_set |= HCR_TERR | HCR_TEA;
hcr_clear |= HCR_FIEN;
}
/* Trap AMU */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
hcr_clear |= HCR_AMVOFFEN;
cptr_set |= CPTR_EL2_TAM;
}
/* Trap SVE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
cptr_set |= CPTR_EL2_TZ;
vcpu->arch.hcr_el2 |= hcr_set;
vcpu->arch.hcr_el2 &= ~hcr_clear;
vcpu->arch.cptr_el2 |= cptr_set;
}
/*
* Set trap register values based on features in ID_AA64PFR1.
*/
static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
{
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
u64 hcr_set = 0;
u64 hcr_clear = 0;
/* Memory Tagging: Trap and Treat as Untagged if not supported. */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
hcr_set |= HCR_TID5;
hcr_clear |= HCR_DCT | HCR_ATA;
}
vcpu->arch.hcr_el2 |= hcr_set;
vcpu->arch.hcr_el2 &= ~hcr_clear;
}
/*
* Set trap register values based on features in ID_AA64DFR0.
*/
static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
{
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
u64 mdcr_set = 0;
u64 mdcr_clear = 0;
u64 cptr_set = 0;
/* Trap/constrain PMU */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
MDCR_EL2_HPMN_MASK;
}
/* Trap Debug */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
/* Trap OS Double Lock */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
mdcr_set |= MDCR_EL2_TDOSA;
/* Trap SPE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
mdcr_set |= MDCR_EL2_TPMS;
mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
}
/* Trap Trace Filter */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
mdcr_set |= MDCR_EL2_TTRF;
/* Trap Trace */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
cptr_set |= CPTR_EL2_TTA;
vcpu->arch.mdcr_el2 |= mdcr_set;
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
vcpu->arch.cptr_el2 |= cptr_set;
}
/*
* Set trap register values based on features in ID_AA64MMFR0.
*/
static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
{
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
u64 mdcr_set = 0;
/* Trap Debug Communications Channel registers */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
mdcr_set |= MDCR_EL2_TDCC;
vcpu->arch.mdcr_el2 |= mdcr_set;
}
/*
* Set trap register values based on features in ID_AA64MMFR1.
*/
static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
{
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
u64 hcr_set = 0;
/* Trap LOR */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
hcr_set |= HCR_TLOR;
vcpu->arch.hcr_el2 |= hcr_set;
}
/*
* Set baseline trap register values.
*/
static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
{
const u64 hcr_trap_feat_regs = HCR_TID3;
const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
/*
* Always trap:
* - Feature id registers: to control features exposed to guests
* - Implementation-defined features
*/
vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
/* Clear res0 and set res1 bits to trap potential new features. */
vcpu->arch.hcr_el2 &= ~(HCR_RES0);
vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
}
/*
* Initialize trap register values for protected VMs.
*/
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
pvm_init_trap_regs(vcpu);
pvm_init_traps_aa64pfr0(vcpu);
pvm_init_traps_aa64pfr1(vcpu);
pvm_init_traps_aa64dfr0(vcpu);
pvm_init_traps_aa64mmfr0(vcpu);
pvm_init_traps_aa64mmfr1(vcpu);
}
......@@ -10,6 +10,7 @@
#include <asm/kvm_pgtable.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
......@@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
int ret;
BUG_ON(kvm_check_pvm_sysreg_table());
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
return -EINVAL;
......
......@@ -27,6 +27,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
......@@ -158,6 +159,101 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
/**
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
*
* Returns true if the hypervisor has handled the exit, and control should go
* back to the guest, or false if it hasn't.
*/
static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/*
* Make sure we handle the exit for workarounds and ptrauth
* before the pKVM handling, as the latter could decide to
* UNDEF.
*/
return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
/**
* Handler for protected floating-point and Advanced SIMD accesses.
*
* Returns true if the hypervisor has handled the exit, and control should go
* back to the guest, or false if it hasn't.
*/
static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/* Linux guests assume support for floating-point and Advanced SIMD. */
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
PVM_ID_AA64PFR0_ALLOW));
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
PVM_ID_AA64PFR0_ALLOW));
return kvm_hyp_handle_fpsimd(vcpu, exit_code);
}
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
static const exit_handler_fn pvm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
[ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
return pvm_exit_handlers;
return hyp_exit_handlers;
}
/*
* Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
* The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
* guest from dropping to AArch32 EL0 if implemented by the CPU. If the
* hypervisor spots a guest in such a state ensure it is handled, and don't
* trust the host to spot or fix it. The check below is based on the one in
* kvm_arch_vcpu_ioctl_run().
*
* Returns false if the guest ran in AArch32 when it shouldn't have, and
* thus should exit to the host, or true if a the guest run loop can continue.
*/
static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
* can try and fix it by re-initializing the vcpu with
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
* protected VMs.
*/
vcpu->arch.target = -1;
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
return false;
}
return true;
}
/* Switch to the guest for legacy non-VHE systems */
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
{
......@@ -220,6 +316,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
break;
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
......
This diff is collapsed.
......@@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
}
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
return hyp_exit_handlers;
}
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
......
......@@ -1087,14 +1087,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
}
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
if (kvm_has_mte(vcpu->kvm)) {
u64 pfr, mte;
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
}
if (!kvm_has_mte(vcpu->kvm))
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment