Commit 64dd3b6a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-non-x86' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
 "These are the non-x86 changes (mostly ARM, as is usually the case).
  The generic and x86 changes will come later"

  ARM:

   - New Stage-2 page table dumper, reusing the main ptdump
     infrastructure

   - FP8 support

   - Nested virtualization now supports the address translation
     (FEAT_ATS1A) family of instructions

   - Add selftest checks for a bunch of timer emulation corner cases

   - Fix multiple cases where KVM/arm64 doesn't correctly handle the
     guest trying to use a GICv3 that wasn't advertised

   - Remove REG_HIDDEN_USER from the sysreg infrastructure, making
     things little simpler

   - Prevent MTE tags being restored by userspace if we are actively
     logging writes, as that's a recipe for disaster

   - Correct the refcount on a page that is not considered for MTE tag
     copying (such as a device)

   - When walking a page table to split block mappings, synchronize only
     at the end the walk rather than on every store

   - Fix boundary check when transfering memory using FFA

   - Fix pKVM TLB invalidation, only affecting currently out of tree
     code but worth addressing for peace of mind

  LoongArch:

   - Revert qspinlock to test-and-set simple lock on VM.

   - Add Loongson Binary Translation extension support.

   - Add PMU support for guest.

   - Enable paravirt feature control from VMM.

   - Implement function kvm_para_has_feature().

  RISC-V:

   - Fix sbiret init before forwarding to userspace

   - Don't zero-out PMU snapshot area before freeing data

   - Allow legacy PMU access from guest

   - Fix to allow hpmcounter31 from the guest"

* tag 'for-linus-non-x86' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (64 commits)
  LoongArch: KVM: Implement function kvm_para_has_feature()
  LoongArch: KVM: Enable paravirt feature control from VMM
  LoongArch: KVM: Add PMU support for guest
  KVM: arm64: Get rid of REG_HIDDEN_USER visibility qualifier
  KVM: arm64: Simplify visibility handling of AArch32 SPSR_*
  KVM: arm64: Simplify handling of CNTKCTL_EL12
  LoongArch: KVM: Add vm migration support for LBT registers
  LoongArch: KVM: Add Binary Translation extension support
  LoongArch: KVM: Add VM feature detection function
  LoongArch: Revert qspinlock to test-and-set simple lock on VM
  KVM: arm64: Register ptdump with debugfs on guest creation
  arm64: ptdump: Don't override the level when operating on the stage-2 tables
  arm64: ptdump: Use the ptdump description from a local context
  arm64: ptdump: Expose the attribute parsing functionality
  KVM: arm64: Add memory length checks and remove inline in do_ffa_mem_xfer
  KVM: arm64: Move pagetable definitions to common header
  KVM: arm64: nv: Add support for FEAT_ATS1A
  KVM: arm64: nv: Plumb handling of AT S1* traps from EL2
  KVM: arm64: nv: Make AT+PAN instructions aware of FEAT_PAN3
  KVM: arm64: nv: Sanitise SCTLR_EL1.EPAN according to VM configuration
  ...
parents 980bcd35 0cdcc99e
......@@ -122,8 +122,8 @@
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n)
#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n)
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n))
#define ESR_ELx_FSC_FAULT_nL (0x2C)
#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
......@@ -161,6 +161,7 @@
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ (0x00)
#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
......
......@@ -107,6 +107,7 @@
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
......
......@@ -236,6 +236,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
......
......@@ -448,6 +448,10 @@ enum vcpu_sysreg {
POR_EL0, /* Permission Overlay Register 0 (EL0) */
/* FP/SIMD/SVE */
SVCR,
FPMR,
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
......@@ -534,6 +538,8 @@ enum vcpu_sysreg {
VNCR(CNTP_CVAL_EL0),
VNCR(CNTP_CTL_EL0),
VNCR(ICH_HCR_EL2),
NR_SYS_REGS /* Nothing after this line! */
};
......@@ -599,6 +605,16 @@ struct kvm_host_data {
struct cpu_sve_state *sve_state;
};
union {
/* HYP VA pointer to the host storage for FPMR */
u64 *fpmr_ptr;
/*
* Used by pKVM only, as it needs to provide storage
* for the host
*/
u64 fpmr;
};
/* Ownership of the FP regs */
enum {
FP_STATE_FREE,
......@@ -668,8 +684,6 @@ struct kvm_vcpu_arch {
void *sve_state;
enum fp_type fp_type;
unsigned int sve_max_vl;
u64 svcr;
u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
......@@ -1477,4 +1491,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(pa + pi + pa3) == 1; \
})
#define kvm_has_fpmr(k) \
(system_supports_fpmr() && \
kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
#endif /* __ARM64_KVM_HOST_H__ */
......@@ -352,5 +352,11 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
......@@ -85,7 +85,7 @@ struct kvm_s2_trans {
bool readable;
int level;
u32 esr;
u64 upper_attr;
u64 desc;
};
static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
......@@ -115,7 +115,7 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
{
return !(trans->upper_attr & BIT(54));
return !(trans->desc & BIT(54));
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
......@@ -205,4 +205,40 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
}
/* Adjust alignment for the contiguous bit as per StageOA() */
#define contiguous_bit_shift(d, wi, l) \
({ \
u8 shift = 0; \
\
if ((d) & PTE_CONT) { \
switch (BIT((wi)->pgshift)) { \
case SZ_4K: \
shift = 4; \
break; \
case SZ_16K: \
shift = (l) == 2 ? 5 : 7; \
break; \
case SZ_64K: \
shift = 5; \
break; \
} \
} \
\
shift; \
})
static inline unsigned int ps_to_output_size(unsigned int ps)
{
switch (ps) {
case 0: return 32;
case 1: return 36;
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5:
default:
return 48;
}
}
#endif /* __ARM64_KVM_NESTED_H */
......@@ -59,6 +59,48 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
KVM_PTE_LEAF_ATTR_HI_S2_XN)
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
#define KVM_MAX_OWNER_ID 1
/*
* Used to indicate a pte for which a 'break-before-make' sequence is in
* progress.
*/
#define KVM_INVALID_PTE_LOCKED BIT(10)
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
......
......@@ -213,6 +213,11 @@
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
/*
* Hierarchical permission for Stage-1 tables
*/
#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
/*
* Highest possible physical address supported.
*/
......@@ -307,6 +312,10 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
#define TCR_HPD0_SHIFT 41
#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT)
#define TCR_HPD1_SHIFT 42
#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT)
#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
......
......@@ -5,6 +5,8 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
#include <linux/ptdump.h>
#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
......@@ -21,14 +23,53 @@ struct ptdump_info {
unsigned long base_addr;
};
struct ptdump_prot_bits {
u64 mask;
u64 val;
const char *set;
const char *clear;
};
struct ptdump_pg_level {
const struct ptdump_prot_bits *bits;
char name[4];
int num;
u64 mask;
};
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
* iterating over the pte entries. When the continuity is broken it then
* dumps out a description of the range.
*/
struct ptdump_pg_state {
struct ptdump_state ptdump;
struct ptdump_pg_level *pg_level;
struct seq_file *seq;
const struct addr_marker *marker;
const struct mm_struct *mm;
unsigned long start_address;
int level;
u64 current_prot;
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
u64 val);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
#endif
#endif /* CONFIG_PTDUMP_DEBUGFS */
#else
static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
int level, u64 val) { }
#endif /* CONFIG_PTDUMP_CORE */
#endif /* __ASM_PTDUMP_H */
......@@ -109,6 +109,9 @@
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
/* Register-based PAN access, for save/restore purposes */
#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
......@@ -325,7 +328,25 @@
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
#define SYS_PAR_EL1_PTW BIT(8)
#define SYS_PAR_EL1_S BIT(9)
#define SYS_PAR_EL1_AssuredOnly BIT(12)
#define SYS_PAR_EL1_TopLevel BIT(13)
#define SYS_PAR_EL1_Overlay BIT(14)
#define SYS_PAR_EL1_DirtyBit BIT(15)
#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
#define SYS_PAR_EL1_RES1 BIT(11)
/* When PAR_EL1.F == 0 */
#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
#define SYS_PAR_EL1_NS BIT(9)
#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
#define SYS_PAR_EL1_NSE BIT(11)
#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
#define PMSEVFR_EL1_RES0_IMP \
......@@ -651,6 +672,7 @@
#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
/* TLBI instructions */
#define TLBI_Op0 1
......
......@@ -66,4 +66,21 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
config PTDUMP_STAGE2_DEBUGFS
bool "Present the stage-2 pagetables to debugfs"
depends on KVM
depends on DEBUG_KERNEL
depends on DEBUG_FS
depends on GENERIC_PTDUMP
select PTDUMP_CORE
default n
help
Say Y here if you want to show the stage-2 kernel pagetables
layout in a debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
It is probably not a good idea to enable this feature in a production
kernel.
If in doubt, say N.
endif # VIRTUALIZATION
......@@ -17,7 +17,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
......@@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
always-y := hyp_constants.h hyp-constants.s
......
......@@ -46,6 +46,8 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
#include "sys_regs.h"
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
enum kvm_wfx_trap_policy {
......@@ -228,6 +230,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
kvm_sys_regs_create_debugfs(kvm);
kvm_s2_ptdump_create_debugfs(kvm);
}
static void kvm_destroy_mpidr_data(struct kvm *kvm)
......@@ -821,15 +824,13 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
if (vcpu_has_nv(vcpu)) {
ret = kvm_init_nv_sysregs(vcpu->kvm);
ret = kvm_finalize_sys_regs(vcpu);
if (ret)
return ret;
}
/*
* This needs to happen after NV has imposed its own restrictions on
* the feature set
* This needs to happen after any restriction has been applied
* to the feature set.
*/
kvm_calculate_traps(vcpu);
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017 - Linaro Ltd
* Author: Jintack Lim <jintack.lim@linaro.org>
*/
#include <linux/kvm_host.h>
#include <asm/esr.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
enum trans_regime {
TR_EL10,
TR_EL20,
TR_EL2,
};
struct s1_walk_info {
u64 baddr;
enum trans_regime regime;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int txsz;
int sl;
bool hpd;
bool be;
bool s2;
};
struct s1_walk_result {
union {
struct {
u64 desc;
u64 pa;
s8 level;
u8 APTable;
bool UXNTable;
bool PXNTable;
};
struct {
u8 fst;
bool ptw;
bool s2;
};
};
bool failed;
};
static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
{
wr->fst = fst;
wr->ptw = ptw;
wr->s2 = s2;
wr->failed = true;
}
#define S1_MMU_DISABLED (-127)
static int get_ia_size(struct s1_walk_info *wi)
{
return 64 - wi->txsz;
}
/* Return true if the IPA is out of the OA range */
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
{
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
}
/* Return the translation regime that applies to an AT instruction */
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
{
/*
* We only get here from guest EL2, so the translation
* regime AT applies to is solely defined by {E2H,TGE}.
*/
switch (op) {
case OP_AT_S1E2R:
case OP_AT_S1E2W:
case OP_AT_S1E2A:
return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
break;
default:
return (vcpu_el2_e2h_is_set(vcpu) &&
vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
}
}
static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
unsigned int stride, x;
bool va55, tbi, lva, as_el0;
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
wi->regime = compute_translation_regime(vcpu, op);
as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
va55 = va & BIT(55);
if (wi->regime == TR_EL2 && va55)
goto addrsz;
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
switch (wi->regime) {
case TR_EL10:
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
ttbr = (va55 ?
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
break;
case TR_EL2:
case TR_EL20:
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
ttbr = (va55 ?
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
break;
default:
BUG();
}
tbi = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_TBI, tcr) :
(va55 ?
FIELD_GET(TCR_TBI1, tcr) :
FIELD_GET(TCR_TBI0, tcr)));
if (!tbi && (u64)sign_extend64(va, 55) != va)
goto addrsz;
va = (u64)sign_extend64(va, 55);
/* Let's put the MMU disabled case aside immediately */
switch (wi->regime) {
case TR_EL10:
/*
* If dealing with the EL1&0 translation regime, 3 things
* can disable the S1 translation:
*
* - HCR_EL2.DC = 1
* - HCR_EL2.{E2H,TGE} = {0,1}
* - SCTLR_EL1.M = 0
*
* The TGE part is interesting. If we have decided that this
* is EL1&0, then it means that either {E2H,TGE} == {1,0} or
* {0,x}, and we only need to test for TGE == 1.
*/
if (hcr & (HCR_DC | HCR_TGE)) {
wr->level = S1_MMU_DISABLED;
break;
}
fallthrough;
case TR_EL2:
case TR_EL20:
if (!(sctlr & SCTLR_ELx_M))
wr->level = S1_MMU_DISABLED;
break;
}
if (wr->level == S1_MMU_DISABLED) {
if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
goto addrsz;
wr->pa = va;
return 0;
}
wi->be = sctlr & SCTLR_ELx_EE;
wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
wi->hpd &= (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_HPD, tcr) :
(va55 ?
FIELD_GET(TCR_HPD1, tcr) :
FIELD_GET(TCR_HPD0, tcr)));
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55) {
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG1_MASK, tcr);
switch (tg << TCR_TG1_SHIFT) {
case TCR_TG1_4K:
wi->pgshift = 12; break;
case TCR_TG1_16K:
wi->pgshift = 14; break;
case TCR_TG1_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
} else {
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG0_MASK, tcr);
switch (tg << TCR_TG0_SHIFT) {
case TCR_TG0_4K:
wi->pgshift = 12; break;
case TCR_TG0_16K:
wi->pgshift = 14; break;
case TCR_TG0_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
}
/* R_PLCGL, R_YXNYW */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
if (wi->txsz > 39)
goto transfault_l0;
} else {
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
goto transfault_l0;
}
/* R_GTJBY, R_SXWGM */
switch (BIT(wi->pgshift)) {
case SZ_4K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
break;
case SZ_16K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
break;
case SZ_64K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
break;
}
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
goto transfault_l0;
ia_bits = get_ia_size(wi);
/* R_YYVYV, I_THCZK */
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
(va55 && va < GENMASK(63, ia_bits)))
goto transfault_l0;
/* I_ZFSYQ */
if (wi->regime != TR_EL2 &&
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
goto transfault_l0;
/* R_BNDVG and following statements */
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
goto transfault_l0;
/* AArch64.S1StartLevel() */
stride = wi->pgshift - 3;
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
ps = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
/* Compute minimal alignment */
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
wi->baddr = ttbr & TTBRx_EL1_BADDR;
/* R_VPBBF */
if (check_output_size(wi->baddr, wi))
goto addrsz;
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
return 0;
addrsz: /* Address Size Fault level 0 */
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
return -EFAULT;
transfault_l0: /* Translation Fault level 0 */
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
return -EFAULT;
}
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
u64 va_top, va_bottom, baddr, desc;
int level, stride, ret;
level = wi->sl;
stride = wi->pgshift - 3;
baddr = wi->baddr;
va_top = get_ia_size(wi) - 1;
while (1) {
u64 index, ipa;
va_bottom = (3 - level) * stride + wi->pgshift;
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
ipa = baddr | index;
if (wi->s2) {
struct kvm_s2_trans s2_trans = {};
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
if (ret) {
fail_s1_walk(wr,
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
true, true);
return ret;
}
if (!kvm_s2_trans_readable(&s2_trans)) {
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
true, true);
return -EPERM;
}
ipa = kvm_s2_trans_output(&s2_trans);
}
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
true, false);
return ret;
}
if (wi->be)
desc = be64_to_cpu((__force __be64)desc);
else
desc = le64_to_cpu((__force __le64)desc);
/* Invalid descriptor */
if (!(desc & BIT(0)))
goto transfault;
/* Block mapping, check validity down the line */
if (!(desc & BIT(1)))
break;
/* Page mapping */
if (level == 3)
break;
/* Table handling */
if (!wi->hpd) {
wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
}
baddr = desc & GENMASK_ULL(47, wi->pgshift);
/* Check for out-of-range OA */
if (check_output_size(baddr, wi))
goto addrsz;
/* Prepare for next round */
va_top = va_bottom - 1;
level++;
}
/* Block mapping, check the validity of the level */
if (!(desc & BIT(1))) {
bool valid_block = false;
switch (BIT(wi->pgshift)) {
case SZ_4K:
valid_block = level == 1 || level == 2;
break;
case SZ_16K:
case SZ_64K:
valid_block = level == 2;
break;
}
if (!valid_block)
goto transfault;
}
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
goto addrsz;
va_bottom += contiguous_bit_shift(desc, wi, level);
wr->failed = false;
wr->level = level;
wr->desc = desc;
wr->pa = desc & GENMASK(47, va_bottom);
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
return 0;
addrsz:
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
return -EINVAL;
transfault:
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
return -ENOENT;
}
struct mmu_config {
u64 ttbr0;
u64 ttbr1;
u64 tcr;
u64 mair;
u64 sctlr;
u64 vttbr;
u64 vtcr;
u64 hcr;
};
static void __mmu_config_save(struct mmu_config *config)
{
config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
config->tcr = read_sysreg_el1(SYS_TCR);
config->mair = read_sysreg_el1(SYS_MAIR);
config->sctlr = read_sysreg_el1(SYS_SCTLR);
config->vttbr = read_sysreg(vttbr_el2);
config->vtcr = read_sysreg(vtcr_el2);
config->hcr = read_sysreg(hcr_el2);
}
static void __mmu_config_restore(struct mmu_config *config)
{
write_sysreg(config->hcr, hcr_el2);
/*
* ARM errata 1165522 and 1530923 require TGE to be 1 before
* we update the guest state.
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
write_sysreg_el1(config->ttbr0, SYS_TTBR0);
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
write_sysreg_el1(config->tcr, SYS_TCR);
write_sysreg_el1(config->mair, SYS_MAIR);
write_sysreg_el1(config->sctlr, SYS_SCTLR);
write_sysreg(config->vttbr, vttbr_el2);
write_sysreg(config->vtcr, vtcr_el2);
}
static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 host_pan;
bool fail;
host_pan = read_sysreg_s(SYS_PSTATE_PAN);
write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
switch (op) {
case OP_AT_S1E1RP:
fail = __kvm_at(OP_AT_S1E1RP, vaddr);
break;
case OP_AT_S1E1WP:
fail = __kvm_at(OP_AT_S1E1WP, vaddr);
break;
}
write_sysreg_s(host_pan, SYS_PSTATE_PAN);
return fail;
}
#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
#define MEMATTR_NC 0b0100
#define MEMATTR_Wt 0b1000
#define MEMATTR_Wb 0b1100
#define MEMATTR_WbRaWa 0b1111
#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
static u8 s2_memattr_to_attr(u8 memattr)
{
memattr &= 0b1111;
switch (memattr) {
case 0b0000:
case 0b0001:
case 0b0010:
case 0b0011:
return memattr << 2;
case 0b0100:
return MEMATTR(Wb, Wb);
case 0b0101:
return MEMATTR(NC, NC);
case 0b0110:
return MEMATTR(Wt, NC);
case 0b0111:
return MEMATTR(Wb, NC);
case 0b1000:
/* Reserved, assume NC */
return MEMATTR(NC, NC);
case 0b1001:
return MEMATTR(NC, Wt);
case 0b1010:
return MEMATTR(Wt, Wt);
case 0b1011:
return MEMATTR(Wb, Wt);
case 0b1100:
/* Reserved, assume NC */
return MEMATTR(NC, NC);
case 0b1101:
return MEMATTR(NC, Wb);
case 0b1110:
return MEMATTR(Wt, Wb);
case 0b1111:
return MEMATTR(Wb, Wb);
default:
unreachable();
}
}
static u8 combine_s1_s2_attr(u8 s1, u8 s2)
{
bool transient;
u8 final = 0;
/* Upgrade transient s1 to non-transient to simplify things */
switch (s1) {
case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
transient = true;
s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
break;
case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
transient = true;
s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
break;
default:
transient = false;
}
/* S2CombineS1AttrHints() */
if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
(s2 & GENMASK(3, 2)) == MEMATTR_NC)
final = MEMATTR_NC;
else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
(s2 & GENMASK(3, 2)) == MEMATTR_Wt)
final = MEMATTR_Wt;
else
final = MEMATTR_Wb;
if (final != MEMATTR_NC) {
/* Inherit RaWa hints form S1 */
if (transient) {
switch (s1 & GENMASK(3, 2)) {
case MEMATTR_Wt:
final = 0;
break;
case MEMATTR_Wb:
final = MEMATTR_NC;
break;
}
}
final |= s1 & GENMASK(1, 0);
}
return final;
}
#define ATTR_NSH 0b00
#define ATTR_RSV 0b01
#define ATTR_OSH 0b10
#define ATTR_ISH 0b11
static u8 compute_sh(u8 attr, u64 desc)
{
u8 sh;
/* Any form of device, as well as NC has SH[1:0]=0b10 */
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
return ATTR_OSH;
sh = FIELD_GET(PTE_SHARED, desc);
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
sh = ATTR_NSH;
return sh;
}
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
{
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
return ATTR_OSH;
if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
return ATTR_ISH;
return ATTR_NSH;
}
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
struct kvm_s2_trans *tr)
{
u8 s1_parattr, s2_memattr, final_attr;
u64 par;
/* If S2 has failed to translate, report the damage */
if (tr->esr) {
par = SYS_PAR_EL1_RES1;
par |= SYS_PAR_EL1_F;
par |= SYS_PAR_EL1_S;
par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
return par;
}
s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
s2_memattr &= ~BIT(3);
/* Combination of R_VRJSW and R_RHWZM */
switch (s2_memattr) {
case 0b0101:
if (MEMATTR_IS_DEVICE(s1_parattr))
final_attr = s1_parattr;
else
final_attr = MEMATTR(NC, NC);
break;
case 0b0110:
case 0b1110:
final_attr = MEMATTR(WbRaWa, WbRaWa);
break;
case 0b0111:
case 0b1111:
/* Preserve S1 attribute */
final_attr = s1_parattr;
break;
case 0b0100:
case 0b1100:
case 0b1101:
/* Reserved, do something non-silly */
final_attr = s1_parattr;
break;
default:
/* MemAttr[2]=0, Device from S2 */
final_attr = s2_memattr & GENMASK(1,0) << 2;
}
} else {
/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
if (MEMATTR_IS_DEVICE(s1_parattr) ||
MEMATTR_IS_DEVICE(s2_parattr)) {
final_attr = min(s1_parattr, s2_parattr);
} else {
/* At this stage, this is memory vs memory */
final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
s2_parattr & 0xf);
final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
s2_parattr >> 4) << 4;
}
}
if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
!MEMATTR_IS_DEVICE(final_attr))
final_attr = MEMATTR(NC, NC);
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
par |= tr->output & GENMASK(47, 12);
par |= FIELD_PREP(SYS_PAR_EL1_SH,
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
compute_sh(final_attr, tr->desc)));
return par;
}
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
enum trans_regime regime)
{
u64 par;
if (wr->failed) {
par = SYS_PAR_EL1_RES1;
par |= SYS_PAR_EL1_F;
par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
par |= wr->s2 ? SYS_PAR_EL1_S : 0;
} else if (wr->level == S1_MMU_DISABLED) {
/* MMU off or HCR_EL2.DC == 1 */
par = SYS_PAR_EL1_NSE;
par |= wr->pa & GENMASK_ULL(47, 12);
if (regime == TR_EL10 &&
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
MEMATTR(WbRaWa, WbRaWa));
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
} else {
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
}
} else {
u64 mair, sctlr;
u8 sh;
par = SYS_PAR_EL1_NSE;
mair = (regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
vcpu_read_sys_reg(vcpu, MAIR_EL2));
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
mair &= 0xff;
sctlr = (regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
/* Force NC for memory if SCTLR_ELx.C is clear */
if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
mair = MEMATTR(NC, NC);
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
par |= wr->pa & GENMASK_ULL(47, 12);
sh = compute_sh(mair, wr->desc);
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
}
return par;
}
static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
u64 sctlr;
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
return false;
if (regime == TR_EL10)
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
else
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
return sctlr & SCTLR_EL1_EPAN;
}
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
bool perm_fail, ur, uw, ux, pr, pw, px;
struct s1_walk_result wr = {};
struct s1_walk_info wi = {};
int ret, idx;
ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
if (ret)
goto compute_par;
if (wr.level == S1_MMU_DISABLED)
goto compute_par;
idx = srcu_read_lock(&vcpu->kvm->srcu);
ret = walk_s1(vcpu, &wi, &wr, vaddr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (ret)
goto compute_par;
/* FIXME: revisit when adding indirect permission support */
/* AArch64.S1DirectBasePermissions() */
if (wi.regime != TR_EL2) {
switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
case 0b00:
pr = pw = true;
ur = uw = false;
break;
case 0b01:
pr = pw = ur = uw = true;
break;
case 0b10:
pr = true;
pw = ur = uw = false;
break;
case 0b11:
pr = ur = true;
pw = uw = false;
break;
}
switch (wr.APTable) {
case 0b00:
break;
case 0b01:
ur = uw = false;
break;
case 0b10:
pw = uw = false;
break;
case 0b11:
pw = ur = uw = false;
break;
}
/* We don't use px for anything yet, but hey... */
px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
bool pan;
pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
pw &= !pan;
pr &= !pan;
}
} else {
ur = uw = ux = false;
if (!(wr.desc & PTE_RDONLY)) {
pr = pw = true;
} else {
pr = true;
pw = false;
}
if (wr.APTable & BIT(1))
pw = false;
/* XN maps to UXN */
px = !((wr.desc & PTE_UXN) || wr.UXNTable);
}
perm_fail = false;
switch (op) {
case OP_AT_S1E1RP:
case OP_AT_S1E1R:
case OP_AT_S1E2R:
perm_fail = !pr;
break;
case OP_AT_S1E1WP:
case OP_AT_S1E1W:
case OP_AT_S1E2W:
perm_fail = !pw;
break;
case OP_AT_S1E0R:
perm_fail = !ur;
break;
case OP_AT_S1E0W:
perm_fail = !uw;
break;
case OP_AT_S1E1A:
case OP_AT_S1E2A:
break;
default:
BUG();
}
if (perm_fail)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
compute_par:
return compute_par_s1(vcpu, &wr, wi.regime);
}
/*
* Return the PAR_EL1 value as the result of a valid translation.
*
* If the translation is unsuccessful, the value may only contain
* PAR_EL1.F, and cannot be taken at face value. It isn't an
* indication of the translation having failed, only that the fast
* path did not succeed, *unless* it indicates a S1 permission fault.
*/
static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct mmu_config config;
struct kvm_s2_mmu *mmu;
bool fail;
u64 par;
par = SYS_PAR_EL1_F;
/*
* We've trapped, so everything is live on the CPU. As we will
* be switching contexts behind everybody's back, disable
* interrupts while holding the mmu lock.
*/
guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
/*
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
* the right one (as we trapped from vEL2). If not, save the
* full MMU context.
*/
if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
goto skip_mmu_switch;
/*
* Obtaining the S2 MMU for a L2 is horribly racy, and we may not
* find it (recycled by another vcpu, for example). When this
* happens, admit defeat immediately and use the SW (slow) path.
*/
mmu = lookup_s2_mmu(vcpu);
if (!mmu)
return par;
__mmu_config_save(&config);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
__load_stage2(mmu, mmu->arch);
skip_mmu_switch:
/* Clear TGE, enable S2 translation, we're rolling */
write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
isb();
switch (op) {
case OP_AT_S1E1RP:
case OP_AT_S1E1WP:
fail = at_s1e1p_fast(vcpu, op, vaddr);
break;
case OP_AT_S1E1R:
fail = __kvm_at(OP_AT_S1E1R, vaddr);
break;
case OP_AT_S1E1W:
fail = __kvm_at(OP_AT_S1E1W, vaddr);
break;
case OP_AT_S1E0R:
fail = __kvm_at(OP_AT_S1E0R, vaddr);
break;
case OP_AT_S1E0W:
fail = __kvm_at(OP_AT_S1E0W, vaddr);
break;
case OP_AT_S1E1A:
fail = __kvm_at(OP_AT_S1E1A, vaddr);
break;
default:
WARN_ON_ONCE(1);
fail = true;
break;
}
if (!fail)
par = read_sysreg_par();
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
__mmu_config_restore(&config);
return par;
}
static bool par_check_s1_perm_fault(u64 par)
{
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
!(par & SYS_PAR_EL1_S));
}
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
/*
* If PAR_EL1 reports that AT failed on a S1 permission fault, we
* know for sure that the PTW was able to walk the S1 tables and
* there's nothing else to do.
*
* If AT failed for any other reason, then we must walk the guest S1
* to emulate the instruction.
*/
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
}
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par;
/*
* We've trapped, so everything is live on the CPU. As we will be
* switching context behind everybody's back, disable interrupts...
*/
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
struct kvm_s2_mmu *mmu;
u64 val, hcr;
bool fail;
mmu = &vcpu->kvm->arch.mmu;
val = hcr = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
val |= HCR_VM;
if (!vcpu_el2_e2h_is_set(vcpu))
val |= HCR_NV | HCR_NV1;
write_sysreg(val, hcr_el2);
isb();
par = SYS_PAR_EL1_F;
switch (op) {
case OP_AT_S1E2R:
fail = __kvm_at(OP_AT_S1E1R, vaddr);
break;
case OP_AT_S1E2W:
fail = __kvm_at(OP_AT_S1E1W, vaddr);
break;
case OP_AT_S1E2A:
fail = __kvm_at(OP_AT_S1E1A, vaddr);
break;
default:
WARN_ON_ONCE(1);
fail = true;
}
isb();
if (!fail)
par = read_sysreg_par();
write_sysreg(hcr, hcr_el2);
isb();
}
/* We failed the translation, let's replay it in slow motion */
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
}
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct kvm_s2_trans out = {};
u64 ipa, par;
bool write;
int ret;
/* Do the stage-1 translation */
switch (op) {
case OP_AT_S12E1R:
op = OP_AT_S1E1R;
write = false;
break;
case OP_AT_S12E1W:
op = OP_AT_S1E1W;
write = true;
break;
case OP_AT_S12E0R:
op = OP_AT_S1E0R;
write = false;
break;
case OP_AT_S12E0W:
op = OP_AT_S1E0W;
write = true;
break;
default:
WARN_ON_ONCE(1);
return;
}
__kvm_at_s1e01(vcpu, op, vaddr);
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
return;
/*
* If we only have a single stage of translation (E2H=0 or
* TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
*/
if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
return;
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
return;
/* Check the access permission */
if (!out.esr &&
((!write && !out.readable) || (write && !out.writable)))
out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
}
......@@ -83,14 +83,20 @@ enum cgt_group_id {
CGT_CPTR_TAM,
CGT_CPTR_TCPAC,
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
CGT_ICH_HCR_TC,
CGT_ICH_HCR_TALL0,
CGT_ICH_HCR_TALL1,
CGT_ICH_HCR_TDIR,
/*
* Anything after this point is a combination of coarse trap
* controls, which must all be evaluated to decide what to do.
*/
__MULTIPLE_CONTROL_BITS__,
CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
CGT_HCR_IMO_FMO_ICH_HCR_TC = __MULTIPLE_CONTROL_BITS__,
CGT_HCR_TID2_TID4,
CGT_HCR_TTLB_TTLBIS,
CGT_HCR_TTLB_TTLBOS,
......@@ -105,6 +111,8 @@ enum cgt_group_id {
CGT_MDCR_TDE_TDRA,
CGT_MDCR_TDCC_TDE_TDA,
CGT_ICH_HCR_TC_TDIR,
/*
* Anything after this point requires a callback evaluating a
* complex trap condition. Ugly stuff.
......@@ -372,12 +380,42 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = CPTR_EL2_TCPAC,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_HCRX_EnFPM] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_EnFPM,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_HCRX_TCR2En] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_ICH_HCR_TC] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TC,
.mask = ICH_HCR_TC,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_ICH_HCR_TALL0] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TALL0,
.mask = ICH_HCR_TALL0,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_ICH_HCR_TALL1] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TALL1,
.mask = ICH_HCR_TALL1,
.behaviour = BEHAVE_FORWARD_ANY,
},
[CGT_ICH_HCR_TDIR] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TDIR,
.mask = ICH_HCR_TDIR,
.behaviour = BEHAVE_FORWARD_ANY,
},
};
#define MCB(id, ...) \
......@@ -387,7 +425,6 @@ static const struct trap_bits coarse_trap_bits[] = {
}
static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_HCR_IMO_FMO, CGT_HCR_IMO, CGT_HCR_FMO),
MCB(CGT_HCR_TID2_TID4, CGT_HCR_TID2, CGT_HCR_TID4),
MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS),
MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS),
......@@ -402,6 +439,9 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA),
MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA),
MCB(CGT_MDCR_TDCC_TDE_TDA, CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
MCB(CGT_HCR_IMO_FMO_ICH_HCR_TC, CGT_HCR_IMO, CGT_HCR_FMO, CGT_ICH_HCR_TC),
MCB(CGT_ICH_HCR_TC_TDIR, CGT_ICH_HCR_TC, CGT_ICH_HCR_TDIR),
};
typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
......@@ -536,9 +576,9 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4),
SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO),
SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO),
SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO),
SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
......@@ -786,6 +826,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV),
SR_TRAP(OP_AT_S1E2A, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV),
......@@ -867,6 +908,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1A, CGT_HCR_AT),
SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN),
......@@ -1108,6 +1150,35 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM),
/*
* IMPDEF choice:
* We treat ICC_SRE_EL2.{SRE,Enable) and ICV_SRE_EL1.SRE as
* RAO/WI. We therefore never consider ICC_SRE_EL2.Enable for
* ICC_SRE_EL1 access, and always handle it locally.
*/
SR_TRAP(SYS_ICC_AP0R0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_AP0R1_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_AP0R2_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_AP0R3_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_AP1R0_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_AP1R1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_AP1R2_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_AP1R3_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_BPR0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_BPR1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_CTLR_EL1, CGT_ICH_HCR_TC),
SR_TRAP(SYS_ICC_DIR_EL1, CGT_ICH_HCR_TC_TDIR),
SR_TRAP(SYS_ICC_EOIR0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_EOIR1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_HPPIR0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_HPPIR1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_IAR0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_IAR1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_IGRPEN0_EL1, CGT_ICH_HCR_TALL0),
SR_TRAP(SYS_ICC_IGRPEN1_EL1, CGT_ICH_HCR_TALL1),
SR_TRAP(SYS_ICC_PMR_EL1, CGT_ICH_HCR_TC),
SR_TRAP(SYS_ICC_RPR_EL1, CGT_ICH_HCR_TC),
};
static DEFINE_XARRAY(sr_forward_xa);
......
......@@ -63,6 +63,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
*/
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
*host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
*host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
......@@ -134,8 +135,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fpmr = &vcpu->arch.fpmr;
fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR);
fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR);
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
......
......@@ -1045,6 +1045,11 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
mutex_lock(&kvm->slots_lock);
if (write && atomic_read(&kvm->nr_memslots_dirty_logging)) {
ret = -EBUSY;
goto out;
}
while (length > 0) {
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
void *maddr;
......@@ -1059,6 +1064,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
page = pfn_to_online_page(pfn);
if (!page) {
/* Reject ZONE_DEVICE memory */
kvm_release_pfn_clean(pfn);
ret = -EFAULT;
goto out;
}
......
......@@ -403,6 +403,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
else
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
/* Skip restoring fpexc32 for AArch64 guests */
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
......
......@@ -426,7 +426,7 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
return;
}
static __always_inline void do_ffa_mem_xfer(const u64 func_id,
static void __do_ffa_mem_xfer(const u64 func_id,
struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
......@@ -440,9 +440,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
u32 offset, nr_ranges;
int ret = 0;
BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
func_id != FFA_FN64_MEM_LEND);
if (addr_mbz || npages_mbz || fraglen > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
......@@ -461,6 +458,11 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
goto out_unlock;
}
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
......@@ -512,6 +514,13 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
#define do_ffa_mem_xfer(fid, res, ctxt) \
do { \
BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \
(fid) != FFA_FN64_MEM_LEND); \
__do_ffa_mem_xfer((fid), (res), (ctxt)); \
} while (0);
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
......
......@@ -130,7 +130,7 @@ alternative_else_nop_endif
/* Invalidate the stale TLBs from Bootloader */
tlbi alle2
tlbi vmalls12e1
tlbi alle1
dsb sy
mov_q x0, INIT_SCTLR_EL2_MMU_ON
......
......@@ -62,6 +62,8 @@ static void fpsimd_sve_flush(void)
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
{
bool has_fpmr;
if (!guest_owns_fp_regs())
return;
......@@ -73,11 +75,18 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
else
__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
if (has_fpmr)
__vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
if (system_supports_sve())
__hyp_sve_restore_host();
else
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
if (has_fpmr)
write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
......
......@@ -197,6 +197,15 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
} else {
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
}
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
u64 val = read_sysreg_s(SYS_FPMR);
if (unlikely(is_protected_kvm_enabled()))
*host_data_ptr(fpmr) = val;
else
**host_data_ptr(fpmr_ptr) = val;
}
}
static const exit_handler_fn hyp_exit_handlers[] = {
......
......@@ -132,10 +132,10 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
else
__load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Ensure write of the old VMID */
isb();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
if (!(cxt->sctlr & SCTLR_ELx_M)) {
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
isb();
......
......@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
KVM_PTE_LEAF_ATTR_HI_S2_XN)
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
#define KVM_MAX_OWNER_ID 1
/*
* Used to indicate a pte for which a 'break-before-make' sequence is in
* progress.
*/
#define KVM_INVALID_PTE_LOCKED BIT(10)
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
......@@ -1547,7 +1505,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
*/
new = kvm_init_table_pte(childp, mm_ops);
stage2_make_pte(ctx, new);
dsb(ishst);
return 0;
}
......@@ -1559,8 +1516,11 @@ int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = mc,
};
int ret;
return kvm_pgtable_walk(pgt, addr, size, &walker);
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
return ret;
}
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
......
......@@ -268,8 +268,16 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
* starting to mess with the rest of the GIC, and VMCR_EL2 in
* particular. This logic must be called before
* __vgic_v3_restore_state().
*
* However, if the vgic is disabled (ICH_HCR_EL2.EN==0), no GIC is
* provisioned at all. In order to prevent illegal accesses to the
* system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1
* so that the trap bits can take effect. Yes, we *loves* the GIC.
*/
if (!cpu_if->vgic_sre) {
if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) {
write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1);
isb();
} else if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
......@@ -288,8 +296,9 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
* Prevent the guest from touching the GIC system registers if
* SRE isn't enabled for GICv3 emulation.
* Prevent the guest from touching the ICC_SRE_EL1 system
* register. Note that this may not have any effect, as
* ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
*/
write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
ICC_SRE_EL2);
......@@ -297,10 +306,11 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
* injected,
* injected. Note that this also applies if we don't expect
* any system register access (no vgic at all).
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm)
cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
......@@ -326,7 +336,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
* no interrupts were being injected, and we disable it again here.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm)
cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(0, ICH_HCR_EL2);
}
......@@ -1032,6 +1042,75 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
u32 sysreg, bool is_read)
{
u64 ich_hcr;
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return false;
ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
switch (sysreg) {
case SYS_ICC_IGRPEN0_EL1:
if (is_read &&
(__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
return true;
if (!is_read &&
(__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
return true;
fallthrough;
case SYS_ICC_AP0Rn_EL1(0):
case SYS_ICC_AP0Rn_EL1(1):
case SYS_ICC_AP0Rn_EL1(2):
case SYS_ICC_AP0Rn_EL1(3):
case SYS_ICC_BPR0_EL1:
case SYS_ICC_EOIR0_EL1:
case SYS_ICC_HPPIR0_EL1:
case SYS_ICC_IAR0_EL1:
return ich_hcr & ICH_HCR_TALL0;
case SYS_ICC_IGRPEN1_EL1:
if (is_read &&
(__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
return true;
if (!is_read &&
(__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
return true;
fallthrough;
case SYS_ICC_AP1Rn_EL1(0):
case SYS_ICC_AP1Rn_EL1(1):
case SYS_ICC_AP1Rn_EL1(2):
case SYS_ICC_AP1Rn_EL1(3):
case SYS_ICC_BPR1_EL1:
case SYS_ICC_EOIR1_EL1:
case SYS_ICC_HPPIR1_EL1:
case SYS_ICC_IAR1_EL1:
return ich_hcr & ICH_HCR_TALL1;
case SYS_ICC_DIR_EL1:
if (ich_hcr & ICH_HCR_TDIR)
return true;
fallthrough;
case SYS_ICC_RPR_EL1:
case SYS_ICC_CTLR_EL1:
case SYS_ICC_PMR_EL1:
return ich_hcr & ICH_HCR_TC;
default:
return false;
}
}
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
......@@ -1041,6 +1120,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
if (kern_hyp_va(vcpu->kvm)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
return 0;
esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
......@@ -1055,6 +1137,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
if (__vgic_v3_check_trap_forwarding(vcpu, sysreg, is_read))
return 0;
switch (sysreg) {
case SYS_ICC_IAR0_EL1:
case SYS_ICC_IAR1_EL1:
......
......@@ -312,6 +312,9 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
if (kvm_has_fpmr(vcpu->kvm))
**host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
}
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
......
......@@ -103,20 +103,6 @@ struct s2_walk_info {
bool be;
};
static unsigned int ps_to_output_size(unsigned int ps)
{
switch (ps) {
case 0: return 32;
case 1: return 36;
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5:
default:
return 48;
}
}
static u32 compute_fsc(int level, u32 fsc)
{
return fsc | (level & 0x3);
......@@ -256,7 +242,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->upper_attr = desc;
out->desc = desc;
return 1;
}
......@@ -266,7 +252,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
out->upper_attr = desc;
out->desc = desc;
return 1;
}
......@@ -278,27 +264,24 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (level < first_block_level) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->upper_attr = desc;
out->desc = desc;
return 1;
}
/*
* We don't use the contiguous bit in the stage-2 ptes, so skip check
* for misprogramming of the contiguous bit.
*/
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
out->upper_attr = desc;
out->desc = desc;
return 1;
}
if (!(desc & BIT(10))) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->upper_attr = desc;
out->desc = desc;
return 1;
}
addr_bottom += contiguous_bit_shift(desc, wi, level);
/* Calculate and return the result */
paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
......@@ -307,7 +290,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
out->readable = desc & (0b01 << 6);
out->writable = desc & (0b10 << 6);
out->level = level;
out->upper_attr = desc & GENMASK_ULL(63, 52);
out->desc = desc;
return 0;
}
......@@ -954,19 +937,16 @@ static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
int kvm_init_nv_sysregs(struct kvm *kvm)
{
u64 res0, res1;
int ret = 0;
mutex_lock(&kvm->arch.config_lock);
lockdep_assert_held(&kvm->arch.config_lock);
if (kvm->arch.sysreg_masks)
goto out;
return 0;
kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
GFP_KERNEL_ACCOUNT);
if (!kvm->arch.sysreg_masks) {
ret = -ENOMEM;
goto out;
}
if (!kvm->arch.sysreg_masks)
return -ENOMEM;
limit_nv_id_regs(kvm);
......@@ -1195,8 +1175,13 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
res0 |= ~(res0 | res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
out:
mutex_unlock(&kvm->arch.config_lock);
return ret;
/* SCTLR_EL1 */
res0 = SCTLR_EL1_RES0;
res1 = SCTLR_EL1_RES1;
if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
res0 |= SCTLR_EL1_EPAN;
set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
return 0;
}
// SPDX-License-Identifier: GPL-2.0-only
/*
* Debug helper used to dump the stage-2 pagetables of the system and their
* associated permissions.
*
* Copyright (C) Google, 2024
* Author: Sebastian Ene <sebastianene@google.com>
*/
#include <linux/debugfs.h>
#include <linux/kvm_host.h>
#include <linux/seq_file.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/ptdump.h>
#define MARKERS_LEN 2
#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
struct kvm_ptdump_guest_state {
struct kvm *kvm;
struct ptdump_pg_state parser_state;
struct addr_marker ipa_marker[MARKERS_LEN];
struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
struct ptdump_range range[MARKERS_LEN];
};
static const struct ptdump_prot_bits stage2_pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
.set = " ",
.clear = "F",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
.set = "R",
.clear = " ",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
.set = "W",
.clear = " ",
}, {
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
.val = PTE_VALID,
.set = " ",
.clear = "X",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
.set = "AF",
.clear = " ",
}, {
.mask = PTE_TABLE_BIT | PTE_VALID,
.val = PTE_VALID,
.set = "BLK",
.clear = " ",
},
};
static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
struct ptdump_pg_state *st = ctx->arg;
struct ptdump_state *pt_st = &st->ptdump;
note_page(pt_st, ctx->addr, ctx->level, ctx->old);
return 0;
}
static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
{
u32 i;
u64 mask;
if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
mask = 0;
for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
mask |= stage2_pte_bits[i].mask;
for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
snprintf(level[i].name, sizeof(level[i].name), "%u", i);
level[i].num = ARRAY_SIZE(stage2_pte_bits);
level[i].bits = stage2_pte_bits;
level[i].mask = mask;
}
return 0;
}
static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm)
{
struct kvm_ptdump_guest_state *st;
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
struct kvm_pgtable *pgtable = mmu->pgt;
int ret;
st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
if (!st)
return ERR_PTR(-ENOMEM);
ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
if (ret) {
kfree(st);
return ERR_PTR(ret);
}
st->ipa_marker[0].name = "Guest IPA";
st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
st->range[0].end = BIT(pgtable->ia_bits);
st->kvm = kvm;
st->parser_state = (struct ptdump_pg_state) {
.marker = &st->ipa_marker[0],
.level = -1,
.pg_level = &st->level[0],
.ptdump.range = &st->range[0],
.start_address = 0,
};
return st;
}
static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
{
int ret;
struct kvm_ptdump_guest_state *st = m->private;
struct kvm *kvm = st->kvm;
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
struct ptdump_pg_state *parser_state = &st->parser_state;
struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
.cb = kvm_ptdump_visitor,
.arg = parser_state,
.flags = KVM_PGTABLE_WALK_LEAF,
};
parser_state->seq = m;
write_lock(&kvm->mmu_lock);
ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker);
write_unlock(&kvm->mmu_lock);
return ret;
}
static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
struct kvm_ptdump_guest_state *st;
int ret;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
st = kvm_ptdump_parser_create(kvm);
if (IS_ERR(st)) {
ret = PTR_ERR(st);
goto err_with_kvm_ref;
}
ret = single_open(file, kvm_ptdump_guest_show, st);
if (!ret)
return 0;
kfree(st);
err_with_kvm_ref:
kvm_put_kvm(kvm);
return ret;
}
static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
void *st = ((struct seq_file *)file->private_data)->private;
kfree(st);
kvm_put_kvm(kvm);
return single_release(m, file);
}
static const struct file_operations kvm_ptdump_guest_fops = {
.open = kvm_ptdump_guest_open,
.read = seq_read,
.llseek = seq_lseek,
.release = kvm_ptdump_guest_close,
};
static int kvm_pgtable_range_show(struct seq_file *m, void *unused)
{
struct kvm_pgtable *pgtable = m->private;
seq_printf(m, "%2u\n", pgtable->ia_bits);
return 0;
}
static int kvm_pgtable_levels_show(struct seq_file *m, void *unused)
{
struct kvm_pgtable *pgtable = m->private;
seq_printf(m, "%1d\n", KVM_PGTABLE_MAX_LEVELS - pgtable->start_level);
return 0;
}
static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file,
int (*show)(struct seq_file *, void *))
{
struct kvm *kvm = m->i_private;
struct kvm_pgtable *pgtable;
int ret;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
pgtable = kvm->arch.mmu.pgt;
ret = single_open(file, show, pgtable);
if (ret < 0)
kvm_put_kvm(kvm);
return ret;
}
static int kvm_pgtable_range_open(struct inode *m, struct file *file)
{
return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show);
}
static int kvm_pgtable_levels_open(struct inode *m, struct file *file)
{
return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show);
}
static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file)
{
struct kvm *kvm = m->i_private;
kvm_put_kvm(kvm);
return single_release(m, file);
}
static const struct file_operations kvm_pgtable_range_fops = {
.open = kvm_pgtable_range_open,
.read = seq_read,
.llseek = seq_lseek,
.release = kvm_pgtable_debugfs_close,
};
static const struct file_operations kvm_pgtable_levels_fops = {
.open = kvm_pgtable_levels_open,
.read = seq_read,
.llseek = seq_lseek,
.release = kvm_pgtable_debugfs_close,
};
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm)
{
debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
kvm, &kvm_ptdump_guest_fops);
debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm,
&kvm_pgtable_range_fops);
debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry,
kvm, &kvm_pgtable_levels_fops);
}
......@@ -48,6 +48,13 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val);
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
kvm_inject_undefined(vcpu);
return false;
}
static bool bad_trap(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r,
......@@ -55,8 +62,7 @@ static bool bad_trap(struct kvm_vcpu *vcpu,
{
WARN_ONCE(1, "Unexpected %s\n", msg);
print_sys_reg_instr(params);
kvm_inject_undefined(vcpu);
return false;
return undef_access(vcpu, params, r);
}
static bool read_from_write_only(struct kvm_vcpu *vcpu,
......@@ -347,10 +353,8 @@ static bool access_dcgsw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_mte(vcpu->kvm)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_has_mte(vcpu->kvm))
return undef_access(vcpu, p, r);
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
return access_dcsw(vcpu, p, r);
......@@ -387,10 +391,8 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
u64 val, mask, shift;
if (reg_to_encoding(r) == SYS_TCR2_EL1 &&
!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) {
kvm_inject_undefined(vcpu);
return false;
}
!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
return undef_access(vcpu, p, r);
BUG_ON(!p->is_write);
......@@ -437,10 +439,8 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
{
bool g1;
if (!kvm_has_gicv3(vcpu->kvm)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_has_gicv3(vcpu->kvm))
return undef_access(vcpu, p, r);
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
......@@ -485,6 +485,9 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_gicv3(vcpu->kvm))
return undef_access(vcpu, p, r);
if (p->is_write)
return ignore_write(vcpu, p);
......@@ -502,14 +505,6 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
static bool trap_undef(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
kvm_inject_undefined(vcpu);
return false;
}
/*
* ARMv8.1 mandates at least a trivial LORegion implementation, where all the
* RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
......@@ -522,10 +517,8 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
{
u32 sr = reg_to_encoding(r);
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP))
return undef_access(vcpu, p, r);
if (p->is_write && sr == SYS_LORID_EL1)
return write_to_read_only(vcpu, p, r);
......@@ -1258,10 +1251,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
if (!vcpu_mode_priv(vcpu)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!vcpu_mode_priv(vcpu))
return undef_access(vcpu, p, r);
__vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
p->regval & ARMV8_PMU_USERENR_MASK;
......@@ -1345,14 +1336,6 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
.reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
kvm_inject_undefined(vcpu);
return false;
}
/* Macro to expand the AMU counter and type registers*/
#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access }
#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access }
......@@ -1411,8 +1394,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
kvm_inject_undefined(vcpu);
return false;
return undef_access(vcpu, p, r);
}
if (p->is_write)
......@@ -1546,6 +1528,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
break;
case SYS_ID_AA64PFR2_EL1:
/* We only expose FPMR */
val &= ID_AA64PFR2_EL1_FPMR;
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
......@@ -1679,6 +1665,24 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
static unsigned int sme_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SME, IMP))
return 0;
return REG_HIDDEN;
}
static unsigned int fp8_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (kvm_has_fpmr(vcpu->kvm))
return 0;
return REG_HIDDEN;
}
static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
......@@ -2094,26 +2098,6 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
/*
* EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
* handling traps. Given that, they are always hidden from userspace.
*/
static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
return REG_HIDDEN_USER;
}
#define EL12_REG(name, acc, rst, v) { \
SYS_DESC(SYS_##name##_EL12), \
.access = acc, \
.reset = rst, \
.reg = name##_EL1, \
.val = v, \
.visibility = hidden_user_visibility, \
}
/*
* Since reset() callback and field val are not used for idregs, they will be
* used for specific purposes for idregs.
......@@ -2221,6 +2205,18 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
return true;
}
static bool access_cntkctl_el12(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
__vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval;
else
p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1);
return true;
}
static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 val = r->val;
......@@ -2320,7 +2316,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
// DBGDTR[TR]X_EL0 share the same encoding
{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
{ SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
......@@ -2378,16 +2374,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SVE |
ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_GIC |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), },
ID_SANITISED(ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
ID_WRITABLE(ID_AA64FPFR0_EL1, ~ID_AA64FPFR0_EL1_RES0),
/* CRm=5 */
{ SYS_DESC(SYS_ID_AA64DFR0_EL1),
......@@ -2469,6 +2464,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SPSR_EL1), access_spsr},
{ SYS_DESC(SYS_ELR_EL1), access_elr},
{ SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
......@@ -2525,18 +2522,31 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
{ SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
{ SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
{ SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
......@@ -2557,7 +2567,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
CTR_EL0_IDC_MASK |
CTR_EL0_DminLine_MASK |
CTR_EL0_IminLine_MASK),
{ SYS_DESC(SYS_SVCR), undef_access },
{ SYS_DESC(SYS_SVCR), undef_access, reset_val, SVCR, 0, .visibility = sme_visibility },
{ SYS_DESC(SYS_FPMR), undef_access, reset_val, FPMR, 0, .visibility = fp8_visibility },
{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
.reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
......@@ -2782,7 +2793,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
{ SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
......@@ -2791,20 +2802,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
{ SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi,
.visibility = hidden_user_visibility },
{ SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi,
.visibility = hidden_user_visibility },
{ SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi,
.visibility = hidden_user_visibility },
{ SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi,
.visibility = hidden_user_visibility },
{ SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
{ SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi },
{ SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi },
{ SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi },
{ SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi },
{ SYS_DESC(SYS_IFSR32_EL2), undef_access, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
{ SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
......@@ -2814,7 +2821,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_RMR_EL2), trap_undef },
{ SYS_DESC(SYS_RMR_EL2), undef_access },
EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
......@@ -2822,11 +2831,48 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
EL12_REG(CNTKCTL, access_rw, reset_val, 0),
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s1e01(vcpu, op, p->regval);
return true;
}
static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
/* There is no FGT associated with AT S1E2A :-( */
if (op == OP_AT_S1E2A &&
!kvm_has_feat(vcpu->kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) {
kvm_inject_undefined(vcpu);
return false;
}
__kvm_at_s1e2(vcpu, op, p->regval);
return true;
}
static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s12(vcpu, op, p->regval);
return true;
}
static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr)
{
struct kvm *kvm = vpcu->kvm;
......@@ -2848,10 +2894,8 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
write_lock(&vcpu->kvm->mmu_lock);
......@@ -2920,10 +2964,8 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 limit, vttbr;
if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
limit = BIT_ULL(kvm_get_pa_bits(vcpu->kvm));
......@@ -2948,10 +2990,8 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u64 base, range, tg, num, scale;
int shift;
if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
/*
* Because the shadow S2 structure doesn't necessarily reflect that
......@@ -3019,10 +3059,8 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
......@@ -3062,10 +3100,8 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
WARN_ON(!vcpu_is_el2(vcpu));
if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding)) {
kvm_inject_undefined(vcpu);
return false;
}
if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding))
return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
......@@ -3089,6 +3125,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
SYS_INSN(AT_S1E1R, handle_at_s1e01),
SYS_INSN(AT_S1E1W, handle_at_s1e01),
SYS_INSN(AT_S1E0R, handle_at_s1e01),
SYS_INSN(AT_S1E0W, handle_at_s1e01),
SYS_INSN(AT_S1E1RP, handle_at_s1e01),
SYS_INSN(AT_S1E1WP, handle_at_s1e01),
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
......@@ -3168,19 +3212,27 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_VALE1NXS, handle_tlbi_el1),
SYS_INSN(TLBI_VAALE1NXS, handle_tlbi_el1),
SYS_INSN(AT_S1E2R, handle_at_s1e2),
SYS_INSN(AT_S1E2W, handle_at_s1e2),
SYS_INSN(AT_S12E1R, handle_at_s12),
SYS_INSN(AT_S12E1W, handle_at_s12),
SYS_INSN(AT_S12E0R, handle_at_s12),
SYS_INSN(AT_S12E0W, handle_at_s12),
SYS_INSN(AT_S1E2A, handle_at_s1e2),
SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is),
SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is),
SYS_INSN(TLBI_ALLE2OS, trap_undef),
SYS_INSN(TLBI_VAE2OS, trap_undef),
SYS_INSN(TLBI_ALLE2OS, undef_access),
SYS_INSN(TLBI_VAE2OS, undef_access),
SYS_INSN(TLBI_ALLE1OS, handle_alle1is),
SYS_INSN(TLBI_VALE2OS, trap_undef),
SYS_INSN(TLBI_VALE2OS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is),
SYS_INSN(TLBI_RVAE2IS, trap_undef),
SYS_INSN(TLBI_RVALE2IS, trap_undef),
SYS_INSN(TLBI_RVAE2IS, undef_access),
SYS_INSN(TLBI_RVALE2IS, undef_access),
SYS_INSN(TLBI_ALLE1IS, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is),
......@@ -3192,10 +3244,10 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is),
SYS_INSN(TLBI_RVAE2OS, trap_undef),
SYS_INSN(TLBI_RVALE2OS, trap_undef),
SYS_INSN(TLBI_RVAE2, trap_undef),
SYS_INSN(TLBI_RVALE2, trap_undef),
SYS_INSN(TLBI_RVAE2OS, undef_access),
SYS_INSN(TLBI_RVALE2OS, undef_access),
SYS_INSN(TLBI_RVAE2, undef_access),
SYS_INSN(TLBI_RVALE2, undef_access),
SYS_INSN(TLBI_ALLE1, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is),
......@@ -3204,19 +3256,19 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is),
SYS_INSN(TLBI_ALLE2OSNXS, trap_undef),
SYS_INSN(TLBI_VAE2OSNXS, trap_undef),
SYS_INSN(TLBI_ALLE2OSNXS, undef_access),
SYS_INSN(TLBI_VAE2OSNXS, undef_access),
SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is),
SYS_INSN(TLBI_VALE2OSNXS, trap_undef),
SYS_INSN(TLBI_VALE2OSNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is),
SYS_INSN(TLBI_RVAE2ISNXS, trap_undef),
SYS_INSN(TLBI_RVALE2ISNXS, trap_undef),
SYS_INSN(TLBI_ALLE2ISNXS, trap_undef),
SYS_INSN(TLBI_VAE2ISNXS, trap_undef),
SYS_INSN(TLBI_RVAE2ISNXS, undef_access),
SYS_INSN(TLBI_RVALE2ISNXS, undef_access),
SYS_INSN(TLBI_ALLE2ISNXS, undef_access),
SYS_INSN(TLBI_VAE2ISNXS, undef_access),
SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is),
SYS_INSN(TLBI_VALE2ISNXS, trap_undef),
SYS_INSN(TLBI_VALE2ISNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is),
SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is),
......@@ -3226,14 +3278,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is),
SYS_INSN(TLBI_RVAE2OSNXS, trap_undef),
SYS_INSN(TLBI_RVALE2OSNXS, trap_undef),
SYS_INSN(TLBI_RVAE2NXS, trap_undef),
SYS_INSN(TLBI_RVALE2NXS, trap_undef),
SYS_INSN(TLBI_ALLE2NXS, trap_undef),
SYS_INSN(TLBI_VAE2NXS, trap_undef),
SYS_INSN(TLBI_RVAE2OSNXS, undef_access),
SYS_INSN(TLBI_RVALE2OSNXS, undef_access),
SYS_INSN(TLBI_RVAE2NXS, undef_access),
SYS_INSN(TLBI_RVALE2NXS, undef_access),
SYS_INSN(TLBI_ALLE2NXS, undef_access),
SYS_INSN(TLBI_VAE2NXS, undef_access),
SYS_INSN(TLBI_ALLE1NXS, handle_alle1is),
SYS_INSN(TLBI_VALE2NXS, trap_undef),
SYS_INSN(TLBI_VALE2NXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is),
};
......@@ -3411,6 +3463,7 @@ static const struct sys_reg_desc cp15_regs[] = {
/* TTBCR2 */
{ AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 },
{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 },
{ CP15_SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
/* DFSR */
{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 },
{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 },
......@@ -3460,8 +3513,28 @@ static const struct sys_reg_desc cp15_regs[] = {
/* AMAIR1 */
{ AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 },
/* ICC_SRE */
{ Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
{ CP15_SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
{ CP15_SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 },
......@@ -4298,7 +4371,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int ret;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
if (!r || sysreg_hidden_user(vcpu, r))
if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->get_user) {
......@@ -4342,7 +4415,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
return -EFAULT;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
if (!r || sysreg_hidden_user(vcpu, r))
if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (sysreg_user_write_ignore(vcpu, r))
......@@ -4428,7 +4501,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
if (sysreg_hidden_user(vcpu, rd))
if (sysreg_hidden(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
......@@ -4569,6 +4642,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
mutex_lock(&kvm->arch.config_lock);
vcpu_set_hcr(vcpu);
vcpu_set_ich_hcr(vcpu);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
/*
......@@ -4584,6 +4658,9 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
if (kvm_has_fpmr(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
}
if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
......@@ -4622,6 +4699,13 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
HFGITR_EL2_TLBIRVAAE1OS |
HFGITR_EL2_TLBIRVAE1OS);
if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A;
if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
HFGITR_EL2_ATS1E1WP);
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
......@@ -4639,6 +4723,36 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
mutex_unlock(&kvm->arch.config_lock);
}
/*
* Perform last adjustments to the ID registers that are implied by the
* configuration outside of the ID regs themselves, as well as any
* initialisation that directly depend on these ID registers (such as
* RES0/RES1 behaviours). This is not the place to configure traps though.
*
* Because this can be called once per CPU, changes must be idempotent.
*/
int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
guard(mutex)(&kvm->arch.config_lock);
if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
irqchip_in_kernel(kvm) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
}
if (vcpu_has_nv(vcpu)) {
int ret = kvm_init_nv_sysregs(kvm);
if (ret)
return ret;
}
return 0;
}
int __init kvm_sys_reg_table_init(void)
{
bool valid = true;
......
......@@ -95,9 +95,8 @@ struct sys_reg_desc {
};
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */
#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */
#define REG_USER_WI (1 << 3) /* WI from userspace only */
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
#define REG_USER_WI (1 << 2) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
......@@ -165,15 +164,6 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
}
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
......@@ -235,6 +225,8 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu);
#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
#define Op1(_x) .Op1 = _x
......@@ -248,4 +240,11 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
Op2(sys_reg_Op2(reg))
#define CP15_SYS_DESC(reg) \
.name = #reg, \
.aarch32_map = AA32_DIRECT, \
Op0(0), Op1(sys_reg_Op1(reg)), \
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
Op2(sys_reg_Op2(reg))
#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
......@@ -292,6 +292,18 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
}
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
/* Hide GICv3 sysreg if necessary */
if (!kvm_has_gicv3(vcpu->kvm)) {
vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC;
return;
}
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
if (group1_trap)
......
......@@ -922,10 +922,13 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
void kvm_vgic_load(struct kvm_vcpu *vcpu)
{
if (unlikely(!vgic_initialized(vcpu->kvm)))
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
}
if (kvm_vgic_global_state.type == VGIC_V2)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_load(vcpu);
else
vgic_v3_load(vcpu);
......@@ -933,10 +936,13 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu)
void kvm_vgic_put(struct kvm_vcpu *vcpu)
{
if (unlikely(!vgic_initialized(vcpu->kvm)))
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
}
if (kvm_vgic_global_state.type == VGIC_V2)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_put(vcpu);
else
vgic_v3_put(vcpu);
......
......@@ -346,11 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm);
void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val);
int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq);
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu);
static inline bool kvm_has_gicv3(struct kvm *kvm)
{
return (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
irqchip_in_kernel(kvm) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
#endif
......@@ -38,33 +38,7 @@
seq_printf(m, fmt); \
})
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
* iterating over the pte entries. When the continuity is broken it then
* dumps out a description of the range.
*/
struct pg_state {
struct ptdump_state ptdump;
struct seq_file *seq;
const struct addr_marker *marker;
const struct mm_struct *mm;
unsigned long start_address;
int level;
u64 current_prot;
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
};
struct prot_bits {
u64 mask;
u64 val;
const char *set;
const char *clear;
};
static const struct prot_bits pte_bits[] = {
static const struct ptdump_prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
......@@ -143,14 +117,7 @@ static const struct prot_bits pte_bits[] = {
}
};
struct pg_level {
const struct prot_bits *bits;
char name[4];
int num;
u64 mask;
};
static struct pg_level pg_level[] __ro_after_init = {
static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
......@@ -174,7 +141,7 @@ static struct pg_level pg_level[] __ro_after_init = {
},
};
static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
static void dump_prot(struct ptdump_pg_state *st, const struct ptdump_prot_bits *bits,
size_t num)
{
unsigned i;
......@@ -192,7 +159,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
}
}
static void note_prot_uxn(struct pg_state *st, unsigned long addr)
static void note_prot_uxn(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
......@@ -206,7 +173,7 @@ static void note_prot_uxn(struct pg_state *st, unsigned long addr)
st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
}
static void note_prot_wx(struct pg_state *st, unsigned long addr)
static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
......@@ -221,16 +188,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
u64 prot = 0;
/* check if the current level has been folded dynamically */
if ((level == 1 && mm_p4d_folded(st->mm)) ||
(level == 2 && mm_pud_folded(st->mm)))
if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) ||
(level == 2 && mm_pud_folded(st->mm))))
level = 0;
if (level >= 0)
......@@ -286,15 +254,16 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
struct pg_state st;
struct ptdump_pg_state st;
if (info->base_addr < TASK_SIZE_64)
end = TASK_SIZE_64;
st = (struct pg_state){
st = (struct ptdump_pg_state){
.seq = s,
.marker = info->markers,
.mm = info->mm,
.pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
.note_page = note_page,
......@@ -312,10 +281,10 @@ static void __init ptdump_initialize(void)
{
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
if (kernel_pg_levels[i].bits)
for (j = 0; j < kernel_pg_levels[i].num; j++)
kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
......@@ -324,12 +293,13 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = {
bool ptdump_check_wx(void)
{
struct pg_state st = {
struct ptdump_pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{ 0, NULL},
{ -1, NULL},
},
.pg_level = &kernel_pg_levels[0],
.level = -1,
.check_wx = true,
.ptdump = {
......
......@@ -6,7 +6,6 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += user.h
generic-y += ioctl.h
generic-y += statfs.h
......
......@@ -30,6 +30,7 @@
: [val] "+r" (__v) \
: [reg] "i" (csr) \
: "memory"); \
__v; \
})
#define gcsr_xchg(v, m, csr) \
......@@ -181,6 +182,8 @@ __BUILD_GCSR_OP(tlbidx)
#define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid))
#define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid))
#define kvm_read_clear_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_write(0, gid))
int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu);
static __always_inline unsigned long kvm_read_sw_gcsr(struct loongarch_csrs *csr, int gid)
......@@ -208,4 +211,7 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr,
csr->csrs[gid] |= val & _mask;
}
#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | \
CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3)
#endif /* __ASM_LOONGARCH_KVM_CSR_H__ */
......@@ -30,6 +30,7 @@
#define KVM_HALT_POLL_NS_DEFAULT 500000
#define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1)
#define KVM_REQ_PMU KVM_ARCH_REQ(2)
#define KVM_GUESTDBG_SW_BP_MASK \
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
......@@ -60,9 +61,13 @@ struct kvm_arch_memory_slot {
unsigned long flags;
};
#define HOST_MAX_PMNUM 16
struct kvm_context {
unsigned long vpid_cache;
struct kvm_vcpu *last_vcpu;
/* Host PMU CSR */
u64 perf_ctrl[HOST_MAX_PMNUM];
u64 perf_cntr[HOST_MAX_PMNUM];
};
struct kvm_world_switch {
......@@ -107,6 +112,8 @@ struct kvm_arch {
unsigned int root_level;
spinlock_t phyid_map_lock;
struct kvm_phyid_map *phyid_map;
/* Enabled PV features */
unsigned long pv_features;
s64 time_offset;
struct kvm_context __percpu *vmcs;
......@@ -133,8 +140,15 @@ enum emulation_result {
#define KVM_LARCH_FPU (0x1 << 0)
#define KVM_LARCH_LSX (0x1 << 1)
#define KVM_LARCH_LASX (0x1 << 2)
#define KVM_LARCH_SWCSR_LATEST (0x1 << 3)
#define KVM_LARCH_HWCSR_USABLE (0x1 << 4)
#define KVM_LARCH_LBT (0x1 << 3)
#define KVM_LARCH_PMU (0x1 << 4)
#define KVM_LARCH_SWCSR_LATEST (0x1 << 5)
#define KVM_LARCH_HWCSR_USABLE (0x1 << 6)
#define LOONGARCH_PV_FEAT_UPDATED BIT_ULL(63)
#define LOONGARCH_PV_FEAT_MASK (BIT(KVM_FEATURE_IPI) | \
BIT(KVM_FEATURE_STEAL_TIME) | \
BIT(KVM_FEATURE_VIRT_EXTIOI))
struct kvm_vcpu_arch {
/*
......@@ -168,10 +182,14 @@ struct kvm_vcpu_arch {
/* FPU state */
struct loongarch_fpu fpu FPU_ALIGN;
struct loongarch_lbt lbt;
/* CSR state */
struct loongarch_csrs *csr;
/* Guest max PMU CSR id */
int max_pmu_csrid;
/* GPR used as IO source/target */
u32 io_gpr;
......@@ -239,6 +257,21 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch)
return arch->cpucfg[2] & CPUCFG2_LASX;
}
static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch)
{
return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT);
}
static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch)
{
return arch->cpucfg[6] & CPUCFG6_PMP;
}
static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch)
{
return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT;
}
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
......
......@@ -2,6 +2,8 @@
#ifndef _ASM_LOONGARCH_KVM_PARA_H
#define _ASM_LOONGARCH_KVM_PARA_H
#include <uapi/asm/kvm_para.h>
/*
* Hypercall code field
*/
......@@ -154,10 +156,20 @@ static __always_inline long kvm_hypercall5(u64 fid,
return ret;
}
#ifdef CONFIG_PARAVIRT
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
#else
static inline bool kvm_para_available(void)
{
return false;
}
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
}
#endif
static inline unsigned int kvm_arch_para_hints(void)
{
......
......@@ -75,6 +75,12 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
#endif
#ifdef CONFIG_CPU_HAS_LBT
int kvm_own_lbt(struct kvm_vcpu *vcpu);
#else
static inline int kvm_own_lbt(struct kvm_vcpu *vcpu) { return -EINVAL; }
#endif
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
void kvm_save_timer(struct kvm_vcpu *vcpu);
void kvm_restore_timer(struct kvm_vcpu *vcpu);
......@@ -124,4 +130,9 @@ static inline bool kvm_pvtime_supported(void)
return !!sched_info_on();
}
static inline bool kvm_guest_has_pv_feature(struct kvm_vcpu *vcpu, unsigned int feature)
{
return vcpu->kvm->arch.pv_features & BIT(feature);
}
#endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
......@@ -119,6 +119,7 @@
#define CPUCFG6_PMP BIT(0)
#define CPUCFG6_PAMVER GENMASK(3, 1)
#define CPUCFG6_PMNUM GENMASK(7, 4)
#define CPUCFG6_PMNUM_SHIFT 4
#define CPUCFG6_PMBITS GENMASK(13, 8)
#define CPUCFG6_UPM BIT(14)
......@@ -160,16 +161,8 @@
/*
* CPUCFG index area: 0x40000000 -- 0x400000ff
* SW emulation for KVM hypervirsor
* SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h
*/
#define CPUCFG_KVM_BASE 0x40000000
#define CPUCFG_KVM_SIZE 0x100
#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0)
#define KVM_SIGNATURE "KVM\0"
#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
#define KVM_FEATURE_IPI BIT(1)
#define KVM_FEATURE_STEAL_TIME BIT(2)
#ifndef __ASSEMBLY__
......
......@@ -19,6 +19,7 @@ static inline u64 paravirt_steal_clock(int cpu)
int __init pv_ipi_init(void);
int __init pv_time_init(void);
int __init pv_spinlock_init(void);
#else
......@@ -31,5 +32,11 @@ static inline int pv_time_init(void)
{
return 0;
}
static inline int pv_spinlock_init(void)
{
return 0;
}
#endif // CONFIG_PARAVIRT
#endif
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_LOONGARCH_QSPINLOCK_H
#define _ASM_LOONGARCH_QSPINLOCK_H
#include <linux/jump_label.h>
#ifdef CONFIG_PARAVIRT
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
int val;
if (!static_branch_unlikely(&virt_spin_lock_key))
return false;
/*
* On hypervisors without PARAVIRT_SPINLOCKS support we fall
* back to a Test-and-Set spinlock, because fair locks have
* horrible lock 'holder' preemption issues.
*/
__retry:
val = atomic_read(&lock->val);
if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
cpu_relax();
goto __retry;
}
return true;
}
#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
#endif // _ASM_LOONGARCH_QSPINLOCK_H
# SPDX-License-Identifier: GPL-2.0
syscall-y += unistd_64.h
generic-y += kvm_para.h
......@@ -64,6 +64,7 @@ struct kvm_fpu {
#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL)
#define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL)
#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL)
#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x50000ULL)
#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL)
#define KVM_CSR_IDX_MASK 0x7fff
#define KVM_CPUCFG_IDX_MASK 0x7fff
......@@ -77,11 +78,30 @@ struct kvm_fpu {
/* Debugging: Special instruction for software breakpoint */
#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
/* LBT registers */
#define KVM_REG_LOONGARCH_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1)
#define KVM_REG_LOONGARCH_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2)
#define KVM_REG_LOONGARCH_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3)
#define KVM_REG_LOONGARCH_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4)
#define KVM_REG_LOONGARCH_LBT_EFLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5)
#define KVM_REG_LOONGARCH_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6)
#define LOONGARCH_REG_SHIFT 3
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
/* Device Control API on vm fd */
#define KVM_LOONGARCH_VM_FEAT_CTRL 0
#define KVM_LOONGARCH_VM_FEAT_LSX 0
#define KVM_LOONGARCH_VM_FEAT_LASX 1
#define KVM_LOONGARCH_VM_FEAT_X86BT 2
#define KVM_LOONGARCH_VM_FEAT_ARMBT 3
#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4
#define KVM_LOONGARCH_VM_FEAT_PMU 5
#define KVM_LOONGARCH_VM_FEAT_PV_IPI 6
#define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7
/* Device Control API on vcpu fd */
#define KVM_LOONGARCH_VCPU_CPUCFG 0
#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_KVM_PARA_H
#define _UAPI_ASM_KVM_PARA_H
#include <linux/types.h>
/*
* CPUCFG index area: 0x40000000 -- 0x400000ff
* SW emulation for KVM hypervirsor
*/
#define CPUCFG_KVM_BASE 0x40000000
#define CPUCFG_KVM_SIZE 0x100
#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0)
#define KVM_SIGNATURE "KVM\0"
#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
#define KVM_FEATURE_IPI 1
#define KVM_FEATURE_STEAL_TIME 2
/* BIT 24 - 31 are features configurable by user space vmm */
#define KVM_FEATURE_VIRT_EXTIOI 24
#endif /* _UAPI_ASM_KVM_PARA_H */
......@@ -13,6 +13,7 @@ static int has_steal_clock;
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
static u64 native_steal_clock(int cpu)
{
......@@ -151,11 +152,14 @@ static void pv_init_ipi(void)
}
#endif
static bool kvm_para_available(void)
bool kvm_para_available(void)
{
int config;
static int hypervisor_type;
if (!cpu_has_hypervisor)
return false;
if (!hypervisor_type) {
config = read_cpucfg(CPUCFG_KVM_SIG);
if (!memcmp(&config, KVM_SIGNATURE, 4))
......@@ -165,17 +169,22 @@ static bool kvm_para_available(void)
return hypervisor_type == HYPERVISOR_KVM;
}
int __init pv_ipi_init(void)
unsigned int kvm_arch_para_features(void)
{
int feature;
static unsigned int feature;
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
if (!feature)
feature = read_cpucfg(CPUCFG_KVM_FEATURE);
if (!(feature & KVM_FEATURE_IPI))
return feature;
}
int __init pv_ipi_init(void)
{
if (!kvm_para_has_feature(KVM_FEATURE_IPI))
return 0;
#ifdef CONFIG_SMP
......@@ -206,7 +215,7 @@ static int pv_enable_steal_time(void)
}
addr |= KVM_STEAL_PHYS_VALID;
kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr);
kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), addr);
return 0;
}
......@@ -214,7 +223,7 @@ static int pv_enable_steal_time(void)
static void pv_disable_steal_time(void)
{
if (has_steal_clock)
kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0);
kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), 0);
}
#ifdef CONFIG_SMP
......@@ -258,15 +267,9 @@ static struct notifier_block pv_reboot_nb = {
int __init pv_time_init(void)
{
int r, feature;
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
int r;
feature = read_cpucfg(CPUCFG_KVM_FEATURE);
if (!(feature & KVM_FEATURE_STEAL_TIME))
if (!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
return 0;
has_steal_clock = 1;
......@@ -300,3 +303,13 @@ int __init pv_time_init(void)
return 0;
}
int __init pv_spinlock_init(void)
{
if (!cpu_has_hypervisor)
return 0;
static_branch_enable(&virt_spin_lock_key);
return 0;
}
......@@ -603,6 +603,8 @@ void __init setup_arch(char **cmdline_p)
arch_mem_init(cmdline_p);
resource_init();
jump_label_init(); /* Initialise the static keys for paravirtualization */
#ifdef CONFIG_SMP
plat_smp_setup();
prefill_possible_map();
......
......@@ -476,7 +476,7 @@ core_initcall(ipi_pm_init);
#endif
/* Preload SMP state for boot cpu */
void smp_prepare_boot_cpu(void)
void __init smp_prepare_boot_cpu(void)
{
unsigned int cpu, node, rr_node;
......@@ -509,6 +509,8 @@ void smp_prepare_boot_cpu(void)
rr_node = next_node_in(rr_node, node_online_map);
}
}
pv_spinlock_init();
}
/* called from main before smp_init() */
......
......@@ -50,9 +50,7 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
case CPUCFG_KVM_FEATURE:
ret = KVM_FEATURE_IPI;
if (kvm_pvtime_supported())
ret |= KVM_FEATURE_STEAL_TIME;
ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
vcpu->arch.gprs[rd] = ret;
break;
default:
......@@ -127,6 +125,14 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst)
rj = inst.reg2csr_format.rj;
csrid = inst.reg2csr_format.csr;
if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) {
if (kvm_guest_has_pmu(&vcpu->arch)) {
vcpu->arch.pc -= 4;
kvm_make_request(KVM_REQ_PMU, vcpu);
return EMULATE_DONE;
}
}
/* Process CSR ops */
switch (rj) {
case 0: /* process csrrd */
......@@ -697,25 +703,22 @@ static long kvm_save_notify(struct kvm_vcpu *vcpu)
id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
switch (id) {
case KVM_FEATURE_STEAL_TIME:
if (!kvm_pvtime_supported())
return KVM_HCALL_INVALID_CODE;
case BIT(KVM_FEATURE_STEAL_TIME):
if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
return KVM_HCALL_INVALID_PARAMETER;
vcpu->arch.st.guest_addr = data;
if (!(data & KVM_STEAL_PHYS_VALID))
break;
return 0;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
break;
return 0;
default:
break;
return KVM_HCALL_INVALID_CODE;
};
return 0;
return KVM_HCALL_INVALID_CODE;
};
/*
......@@ -748,6 +751,14 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu)
{
if (kvm_own_lbt(vcpu))
kvm_queue_exception(vcpu, EXCCODE_INE, 0);
return RESUME_GUEST;
}
static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
{
unsigned int min, cpu, i;
......@@ -781,19 +792,21 @@ static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
*/
static void kvm_handle_service(struct kvm_vcpu *vcpu)
{
long ret = KVM_HCALL_INVALID_CODE;
unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0);
long ret;
switch (func) {
case KVM_HCALL_FUNC_IPI:
if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) {
kvm_send_pv_ipi(vcpu);
ret = KVM_HCALL_SUCCESS;
}
break;
case KVM_HCALL_FUNC_NOTIFY:
if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME))
ret = kvm_save_notify(vcpu);
break;
default:
ret = KVM_HCALL_INVALID_CODE;
break;
}
......@@ -865,6 +878,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
[EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
[EXCCODE_BTDIS] = kvm_handle_lbt_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
[EXCCODE_HVC] = kvm_handle_hypercall,
};
......
......@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>
#include <linux/entry-kvm.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/loongarch.h>
#include <asm/setup.h>
#include <asm/time.h>
......@@ -31,6 +32,126 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu)
{
struct kvm_context *context;
context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
context->perf_cntr[0] = read_csr_perfcntr0();
context->perf_cntr[1] = read_csr_perfcntr1();
context->perf_cntr[2] = read_csr_perfcntr2();
context->perf_cntr[3] = read_csr_perfcntr3();
context->perf_ctrl[0] = write_csr_perfctrl0(0);
context->perf_ctrl[1] = write_csr_perfctrl1(0);
context->perf_ctrl[2] = write_csr_perfctrl2(0);
context->perf_ctrl[3] = write_csr_perfctrl3(0);
}
static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu)
{
struct kvm_context *context;
context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
write_csr_perfcntr0(context->perf_cntr[0]);
write_csr_perfcntr1(context->perf_cntr[1]);
write_csr_perfcntr2(context->perf_cntr[2]);
write_csr_perfcntr3(context->perf_cntr[3]);
write_csr_perfctrl0(context->perf_ctrl[0]);
write_csr_perfctrl1(context->perf_ctrl[1]);
write_csr_perfctrl2(context->perf_ctrl[2]);
write_csr_perfctrl3(context->perf_ctrl[3]);
}
static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu)
{
struct loongarch_csrs *csr = vcpu->arch.csr;
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
}
static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu)
{
struct loongarch_csrs *csr = vcpu->arch.csr;
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
}
static int kvm_own_pmu(struct kvm_vcpu *vcpu)
{
unsigned long val;
if (!kvm_guest_has_pmu(&vcpu->arch))
return -EINVAL;
kvm_save_host_pmu(vcpu);
/* Set PM0-PM(num) to guest */
val = read_csr_gcfg() & ~CSR_GCFG_GPERF;
val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT;
write_csr_gcfg(val);
kvm_restore_guest_pmu(vcpu);
return 0;
}
static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
{
unsigned long val;
struct loongarch_csrs *csr = vcpu->arch.csr;
if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU))
return;
kvm_save_guest_pmu(vcpu);
/* Disable pmu access from guest */
write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF);
/*
* Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
* exiting the guest, so that the next time trap into the guest.
* We don't need to deal with PMU CSRs contexts.
*/
val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
if (!(val & KVM_PMU_EVENT_ENABLED))
vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
kvm_restore_host_pmu(vcpu);
}
static void kvm_restore_pmu(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU))
kvm_make_request(KVM_REQ_PMU, vcpu);
}
static void kvm_check_pmu(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
kvm_own_pmu(vcpu);
vcpu->arch.aux_inuse |= KVM_LARCH_PMU;
}
}
static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
{
u32 version;
......@@ -158,6 +279,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
/* Make sure the vcpu mode has been written */
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
kvm_check_vpid(vcpu);
kvm_check_pmu(vcpu);
/*
* Called after function kvm_check_vpid()
......@@ -195,6 +317,8 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
kvm_lose_pmu(vcpu);
guest_timing_exit_irqoff();
guest_state_exit_irqoff();
local_irq_enable();
......@@ -468,6 +592,22 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
kvm_write_sw_gcsr(csr, id, val);
/*
* After modifying the PMU CSR register value of the vcpu.
* If the PMU CSRs are used, we need to set KVM_REQ_PMU.
*/
if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) {
unsigned long val;
val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) |
kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) |
kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) |
kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
if (val & KVM_PMU_EVENT_ENABLED)
kvm_make_request(KVM_REQ_PMU, vcpu);
}
return ret;
}
......@@ -497,6 +637,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
*v |= CPUCFG2_LSX;
if (cpu_has_lasx)
*v |= CPUCFG2_LASX;
if (cpu_has_lbt_x86)
*v |= CPUCFG2_X86BT;
if (cpu_has_lbt_arm)
*v |= CPUCFG2_ARMBT;
if (cpu_has_lbt_mips)
*v |= CPUCFG2_MIPSBT;
return 0;
case LOONGARCH_CPUCFG3:
......@@ -506,6 +652,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
case LOONGARCH_CPUCFG5:
*v = GENMASK(31, 0);
return 0;
case LOONGARCH_CPUCFG6:
if (cpu_has_pmp)
*v = GENMASK(14, 0);
else
*v = 0;
return 0;
case LOONGARCH_CPUCFG16:
*v = GENMASK(16, 0);
return 0;
......@@ -550,6 +702,17 @@ static int kvm_check_cpucfg(int id, u64 val)
/* LASX architecturally implies LSX and FP but val does not satisfy that */
return -EINVAL;
return 0;
case LOONGARCH_CPUCFG6:
if (val & CPUCFG6_PMP) {
u32 host = read_cpucfg(LOONGARCH_CPUCFG6);
if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS))
return -EINVAL;
if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM))
return -EINVAL;
if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM))
return -EINVAL;
}
return 0;
default:
/*
* Values for the other CPUCFG IDs are not being further validated
......@@ -577,6 +740,34 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
else
ret = -EINVAL;
break;
case KVM_REG_LOONGARCH_LBT:
if (!kvm_guest_has_lbt(&vcpu->arch))
return -ENXIO;
switch (reg->id) {
case KVM_REG_LOONGARCH_LBT_SCR0:
*v = vcpu->arch.lbt.scr0;
break;
case KVM_REG_LOONGARCH_LBT_SCR1:
*v = vcpu->arch.lbt.scr1;
break;
case KVM_REG_LOONGARCH_LBT_SCR2:
*v = vcpu->arch.lbt.scr2;
break;
case KVM_REG_LOONGARCH_LBT_SCR3:
*v = vcpu->arch.lbt.scr3;
break;
case KVM_REG_LOONGARCH_LBT_EFLAGS:
*v = vcpu->arch.lbt.eflags;
break;
case KVM_REG_LOONGARCH_LBT_FTOP:
*v = vcpu->arch.fpu.ftop;
break;
default:
ret = -EINVAL;
break;
}
break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
case KVM_REG_LOONGARCH_COUNTER:
......@@ -635,6 +826,37 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
if (ret)
break;
vcpu->arch.cpucfg[id] = (u32)v;
if (id == LOONGARCH_CPUCFG6)
vcpu->arch.max_pmu_csrid =
LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1;
break;
case KVM_REG_LOONGARCH_LBT:
if (!kvm_guest_has_lbt(&vcpu->arch))
return -ENXIO;
switch (reg->id) {
case KVM_REG_LOONGARCH_LBT_SCR0:
vcpu->arch.lbt.scr0 = v;
break;
case KVM_REG_LOONGARCH_LBT_SCR1:
vcpu->arch.lbt.scr1 = v;
break;
case KVM_REG_LOONGARCH_LBT_SCR2:
vcpu->arch.lbt.scr2 = v;
break;
case KVM_REG_LOONGARCH_LBT_SCR3:
vcpu->arch.lbt.scr3 = v;
break;
case KVM_REG_LOONGARCH_LBT_EFLAGS:
vcpu->arch.lbt.eflags = v;
break;
case KVM_REG_LOONGARCH_LBT_FTOP:
vcpu->arch.fpu.ftop = v;
break;
default:
ret = -EINVAL;
break;
}
break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
......@@ -728,7 +950,10 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
switch (attr->attr) {
case 2:
case LOONGARCH_CPUCFG2:
case LOONGARCH_CPUCFG6:
return 0;
case CPUCFG_KVM_FEATURE:
return 0;
default:
return -ENXIO;
......@@ -740,8 +965,8 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
if (!kvm_pvtime_supported() ||
attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
|| attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
return 0;
......@@ -773,9 +998,18 @@ static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu,
uint64_t val;
uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
switch (attr->attr) {
case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
ret = _kvm_get_cpucfg_mask(attr->attr, &val);
if (ret)
return ret;
break;
case CPUCFG_KVM_FEATURE:
val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
break;
default:
return -ENXIO;
}
put_user(val, uaddr);
......@@ -788,8 +1022,8 @@ static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
u64 gpa;
u64 __user *user = (u64 __user *)attr->addr;
if (!kvm_pvtime_supported() ||
attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
|| attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
gpa = vcpu->arch.st.guest_addr;
......@@ -821,7 +1055,28 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
u64 val, valid;
u64 __user *user = (u64 __user *)attr->addr;
struct kvm *kvm = vcpu->kvm;
switch (attr->attr) {
case CPUCFG_KVM_FEATURE:
if (get_user(val, user))
return -EFAULT;
valid = LOONGARCH_PV_FEAT_MASK;
if (val & ~valid)
return -EINVAL;
/* All vCPUs need set the same PV features */
if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED)
&& ((kvm->arch.pv_features & valid) != val))
return -EINVAL;
kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED;
return 0;
default:
return -ENXIO;
}
}
static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
......@@ -831,8 +1086,8 @@ static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
u64 gpa, __user *user = (u64 __user *)attr->addr;
struct kvm *kvm = vcpu->kvm;
if (!kvm_pvtime_supported() ||
attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
|| attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
return -ENXIO;
if (get_user(gpa, user))
......@@ -977,12 +1232,66 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
#ifdef CONFIG_CPU_HAS_LBT
int kvm_own_lbt(struct kvm_vcpu *vcpu)
{
if (!kvm_guest_has_lbt(&vcpu->arch))
return -EINVAL;
preempt_disable();
set_csr_euen(CSR_EUEN_LBTEN);
_restore_lbt(&vcpu->arch.lbt);
vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
preempt_enable();
return 0;
}
static void kvm_lose_lbt(struct kvm_vcpu *vcpu)
{
preempt_disable();
if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) {
_save_lbt(&vcpu->arch.lbt);
clear_csr_euen(CSR_EUEN_LBTEN);
vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT;
}
preempt_enable();
}
static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr)
{
/*
* If TM is enabled, top register save/restore will
* cause lbt exception, here enable lbt in advance
*/
if (fcsr & FPU_CSR_TM)
kvm_own_lbt(vcpu);
}
static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
if (vcpu->arch.aux_inuse & KVM_LARCH_LBT)
return;
kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0));
}
}
#else
static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { }
static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { }
static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
#endif
/* Enable FPU and restore context */
void kvm_own_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
/* Enable FPU */
/*
* Enable FPU for guest
* Set FR and FRE according to guest context
*/
kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_FPEN);
kvm_restore_fpu(&vcpu->arch.fpu);
......@@ -1002,6 +1311,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
preempt_disable();
/* Enable LSX for guest */
kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
case KVM_LARCH_FPU:
......@@ -1036,6 +1346,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
case KVM_LARCH_LSX:
......@@ -1067,6 +1378,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
kvm_check_fcsr_alive(vcpu);
if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
kvm_save_lasx(&vcpu->arch.fpu);
vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
......@@ -1089,6 +1401,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
/* Disable FPU */
clear_csr_euen(CSR_EUEN_FPEN);
}
kvm_lose_lbt(vcpu);
preempt_enable();
}
......@@ -1235,6 +1548,9 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
/* Restore hardware PMU CSRs */
kvm_restore_pmu(vcpu);
/* Don't bother restoring registers multiple times unless necessary */
if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
return 0;
......
......@@ -5,6 +5,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_vcpu.h>
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
......@@ -39,6 +40,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.phyid_map_lock);
kvm_init_vmcs(kvm);
/* Enable all PV features by default */
kvm->arch.pv_features = BIT(KVM_FEATURE_IPI);
if (kvm_pvtime_supported())
kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME);
kvm->arch.gpa_size = BIT(cpu_vabits - 1);
kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
kvm->arch.invalid_ptes[0] = 0;
......@@ -99,7 +106,67 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
switch (attr->attr) {
case KVM_LOONGARCH_VM_FEAT_LSX:
if (cpu_has_lsx)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_LASX:
if (cpu_has_lasx)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_X86BT:
if (cpu_has_lbt_x86)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_ARMBT:
if (cpu_has_lbt_arm)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_MIPSBT:
if (cpu_has_lbt_mips)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_PMU:
if (cpu_has_pmp)
return 0;
return -ENXIO;
case KVM_LOONGARCH_VM_FEAT_PV_IPI:
return 0;
case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME:
if (kvm_pvtime_supported())
return 0;
return -ENXIO;
default:
return -ENXIO;
}
}
static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
switch (attr->group) {
case KVM_LOONGARCH_VM_FEAT_CTRL:
return kvm_vm_feature_has_attr(kvm, attr);
default:
return -ENXIO;
}
}
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct kvm *kvm = filp->private_data;
struct kvm_device_attr attr;
switch (ioctl) {
case KVM_HAS_DEVICE_ATTR:
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_vm_has_attr(kvm, &attr);
default:
return -ENOIOCTLCMD;
}
}
......@@ -10,6 +10,7 @@
#define __KVM_VCPU_RISCV_PMU_H
#include <linux/perf/riscv_pmu.h>
#include <asm/kvm_vcpu_insn.h>
#include <asm/sbi.h>
#ifdef CONFIG_RISCV_PMU_SBI
......@@ -64,11 +65,11 @@ struct kvm_pmu {
#if defined(CONFIG_32BIT)
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
{.base = CSR_CYCLEH, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, \
{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
{.base = CSR_CYCLEH, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm }, \
{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm },
#else
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
{.base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
{.base = CSR_CYCLE, .count = 32, .func = kvm_riscv_vcpu_pmu_read_hpm },
#endif
int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid);
......@@ -104,8 +105,20 @@ void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
struct kvm_pmu {
};
static inline int kvm_riscv_vcpu_pmu_read_legacy(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
{
if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
*val = 0;
return KVM_INSN_CONTINUE_NEXT_SEPC;
} else {
return KVM_INSN_ILLEGAL_TRAP;
}
}
#define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
{.base = 0, .count = 0, .func = NULL },
{.base = CSR_CYCLE, .count = 3, .func = kvm_riscv_vcpu_pmu_read_legacy },
static inline void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) {}
static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
......
......@@ -391,19 +391,9 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
if (kvpmu->sdata) {
if (kvpmu->snapshot_addr != INVALID_GPA) {
memset(kvpmu->sdata, 0, snapshot_area_size);
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
kvpmu->sdata, snapshot_area_size);
} else {
pr_warn("snapshot address invalid\n");
}
kfree(kvpmu->sdata);
kvpmu->sdata = NULL;
}
kvpmu->snapshot_addr = INVALID_GPA;
}
......
......@@ -127,8 +127,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->riscv_sbi.args[3] = cp->a3;
run->riscv_sbi.args[4] = cp->a4;
run->riscv_sbi.args[5] = cp->a5;
run->riscv_sbi.ret[0] = cp->a0;
run->riscv_sbi.ret[1] = cp->a1;
run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED;
run->riscv_sbi.ret[1] = 0;
}
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
......
......@@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
......@@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
TEST_GEN_PROGS_aarch64 += demand_paging_test
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
*
* The test validates some edge cases related to the arch-timer:
* - timers above the max TVAL value.
* - timers in the past
* - moving counters ahead and behind pending timers.
* - reprograming timers.
* - timers fired multiple times.
* - masking/unmasking using the timer control mask.
*
* Copyright (c) 2021, Google LLC.
*/
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/sysinfo.h>
#include "arch_timer.h"
#include "gic.h"
#include "vgic.h"
static const uint64_t CVAL_MAX = ~0ULL;
/* tval is a signed 32-bit int. */
static const int32_t TVAL_MAX = INT32_MAX;
static const int32_t TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
/* A nice counter value to use as the starting one for most tests. */
static const uint64_t DEF_CNT = (CVAL_MAX / 2);
/* Number of runs. */
static const uint32_t NR_TEST_ITERS_DEF = 5;
/* Default wait test time in ms. */
static const uint32_t WAIT_TEST_MS = 10;
/* Default "long" wait test time in ms. */
static const uint32_t LONG_WAIT_TEST_MS = 100;
/* Shared with IRQ handler. */
struct test_vcpu_shared_data {
atomic_t handled;
atomic_t spurious;
} shared_data;
struct test_args {
/* Virtual or physical timer and counter tests. */
enum arch_timer timer;
/* Delay used for most timer tests. */
uint64_t wait_ms;
/* Delay used in the test_long_timer_delays test. */
uint64_t long_wait_ms;
/* Number of iterations. */
int iterations;
/* Whether to test the physical timer. */
bool test_physical;
/* Whether to test the virtual timer. */
bool test_virtual;
};
struct test_args test_args = {
.wait_ms = WAIT_TEST_MS,
.long_wait_ms = LONG_WAIT_TEST_MS,
.iterations = NR_TEST_ITERS_DEF,
.test_physical = true,
.test_virtual = true,
};
static int vtimer_irq, ptimer_irq;
enum sync_cmd {
SET_COUNTER_VALUE,
USERSPACE_USLEEP,
USERSPACE_SCHED_YIELD,
USERSPACE_MIGRATE_SELF,
NO_USERSPACE_CMD,
};
typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
static void sleep_poll(enum arch_timer timer, uint64_t usec);
static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
static void sleep_migrate(enum arch_timer timer, uint64_t usec);
sleep_method_t sleep_method[] = {
sleep_poll,
sleep_sched_poll,
sleep_migrate,
sleep_in_userspace,
};
typedef void (*irq_wait_method_t)(void);
static void wait_for_non_spurious_irq(void);
static void wait_poll_for_irq(void);
static void wait_sched_poll_for_irq(void);
static void wait_migrate_poll_for_irq(void);
irq_wait_method_t irq_wait_method[] = {
wait_for_non_spurious_irq,
wait_poll_for_irq,
wait_sched_poll_for_irq,
wait_migrate_poll_for_irq,
};
enum timer_view {
TIMER_CVAL,
TIMER_TVAL,
};
static void assert_irqs_handled(uint32_t n)
{
int h = atomic_read(&shared_data.handled);
__GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
}
static void userspace_cmd(uint64_t cmd)
{
GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
}
static void userspace_migrate_vcpu(void)
{
userspace_cmd(USERSPACE_MIGRATE_SELF);
}
static void userspace_sleep(uint64_t usecs)
{
GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
}
static void set_counter(enum arch_timer timer, uint64_t counter)
{
GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
}
static void guest_irq_handler(struct ex_regs *regs)
{
unsigned int intid = gic_get_and_ack_irq();
enum arch_timer timer;
uint64_t cnt, cval;
uint32_t ctl;
bool timer_condition, istatus;
if (intid == IAR_SPURIOUS) {
atomic_inc(&shared_data.spurious);
goto out;
}
if (intid == ptimer_irq)
timer = PHYSICAL;
else if (intid == vtimer_irq)
timer = VIRTUAL;
else
goto out;
ctl = timer_get_ctl(timer);
cval = timer_get_cval(timer);
cnt = timer_get_cntct(timer);
timer_condition = cnt >= cval;
istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
GUEST_ASSERT_EQ(timer_condition, istatus);
/* Disable and mask the timer. */
timer_set_ctl(timer, CTL_IMASK);
atomic_inc(&shared_data.handled);
out:
gic_set_eoi(intid);
}
static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
uint32_t ctl)
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
timer_set_cval(timer, cval_cycles);
timer_set_ctl(timer, ctl);
}
static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
uint32_t ctl)
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
}
static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
enum timer_view tv)
{
switch (tv) {
case TIMER_CVAL:
set_cval_irq(timer, xval, ctl);
break;
case TIMER_TVAL:
set_tval_irq(timer, xval, ctl);
break;
default:
GUEST_FAIL("Could not get timer %d", timer);
}
}
/*
* Note that this can theoretically hang forever, so we rely on having
* a timeout mechanism in the "runner", like:
* tools/testing/selftests/kselftest/runner.sh.
*/
static void wait_for_non_spurious_irq(void)
{
int h;
local_irq_disable();
for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
wfi();
local_irq_enable();
isb(); /* handle IRQ */
local_irq_disable();
}
}
/*
* Wait for an non-spurious IRQ by polling in the guest or in
* userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
*
* Note that this can theoretically hang forever, so we rely on having
* a timeout mechanism in the "runner", like:
* tools/testing/selftests/kselftest/runner.sh.
*/
static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
{
int h;
local_irq_disable();
h = atomic_read(&shared_data.handled);
local_irq_enable();
while (h == atomic_read(&shared_data.handled)) {
if (usp_cmd == NO_USERSPACE_CMD)
cpu_relax();
else
userspace_cmd(usp_cmd);
}
local_irq_disable();
}
static void wait_poll_for_irq(void)
{
poll_for_non_spurious_irq(NO_USERSPACE_CMD);
}
static void wait_sched_poll_for_irq(void)
{
poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
}
static void wait_migrate_poll_for_irq(void)
{
poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
}
/*
* Sleep for usec microseconds by polling in the guest or in
* userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
*/
static void guest_poll(enum arch_timer test_timer, uint64_t usec,
enum sync_cmd usp_cmd)
{
uint64_t cycles = usec_to_cycles(usec);
/* Whichever timer we are testing with, sleep with the other. */
enum arch_timer sleep_timer = 1 - test_timer;
uint64_t start = timer_get_cntct(sleep_timer);
while ((timer_get_cntct(sleep_timer) - start) < cycles) {
if (usp_cmd == NO_USERSPACE_CMD)
cpu_relax();
else
userspace_cmd(usp_cmd);
}
}
static void sleep_poll(enum arch_timer timer, uint64_t usec)
{
guest_poll(timer, usec, NO_USERSPACE_CMD);
}
static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
{
guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
}
static void sleep_migrate(enum arch_timer timer, uint64_t usec)
{
guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
}
static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
{
userspace_sleep(usec);
}
/*
* Reset the timer state to some nice values like the counter not being close
* to the edge, and the control register masked and disabled.
*/
static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
{
set_counter(timer, cnt);
timer_set_ctl(timer, CTL_IMASK);
}
static void test_timer_xval(enum arch_timer timer, uint64_t xval,
enum timer_view tv, irq_wait_method_t wm, bool reset_state,
uint64_t reset_cnt)
{
local_irq_disable();
if (reset_state)
reset_timer_state(timer, reset_cnt);
set_xval_irq(timer, xval, CTL_ENABLE, tv);
/* This method re-enables IRQs to handle the one we're looking for. */
wm();
assert_irqs_handled(1);
local_irq_enable();
}
/*
* The test_timer_* functions will program the timer, wait for it, and assert
* the firing of the correct IRQ.
*
* These functions don't have a timeout and return as soon as they receive an
* IRQ. They can hang (forever), so we rely on having a timeout mechanism in
* the "runner", like: tools/testing/selftests/kselftest/runner.sh.
*/
static void test_timer_cval(enum arch_timer timer, uint64_t cval,
irq_wait_method_t wm, bool reset_state,
uint64_t reset_cnt)
{
test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
}
static void test_timer_tval(enum arch_timer timer, int32_t tval,
irq_wait_method_t wm, bool reset_state,
uint64_t reset_cnt)
{
test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
reset_cnt);
}
static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
uint64_t usec, enum timer_view timer_view,
sleep_method_t guest_sleep)
{
local_irq_disable();
set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
guest_sleep(timer, usec);
local_irq_enable();
isb();
/* Assume success (no IRQ) after waiting usec microseconds */
assert_irqs_handled(0);
}
static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
uint64_t usec, sleep_method_t wm)
{
test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
}
static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
sleep_method_t wm)
{
/* tval will be cast to an int32_t in test_xval_check_no_irq */
test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
}
/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
static void test_timer_control_mask_then_unmask(enum arch_timer timer)
{
reset_timer_state(timer, DEF_CNT);
set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
/* Unmask the timer, and then get an IRQ. */
local_irq_disable();
timer_set_ctl(timer, CTL_ENABLE);
/* This method re-enables IRQs to handle the one we're looking for. */
wait_for_non_spurious_irq();
assert_irqs_handled(1);
local_irq_enable();
}
/* Check that timer control masks actually mask a timer being fired. */
static void test_timer_control_masks(enum arch_timer timer)
{
reset_timer_state(timer, DEF_CNT);
/* Local IRQs are not masked at this point. */
set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
/* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
sleep_poll(timer, TIMEOUT_NO_IRQ_US);
assert_irqs_handled(0);
timer_set_ctl(timer, CTL_IMASK);
}
static void test_fire_a_timer_multiple_times(enum arch_timer timer,
irq_wait_method_t wm, int num)
{
int i;
local_irq_disable();
reset_timer_state(timer, DEF_CNT);
set_tval_irq(timer, 0, CTL_ENABLE);
for (i = 1; i <= num; i++) {
/* This method re-enables IRQs to handle the one we're looking for. */
wm();
/* The IRQ handler masked and disabled the timer.
* Enable and unmmask it again.
*/
timer_set_ctl(timer, CTL_ENABLE);
assert_irqs_handled(i);
}
local_irq_enable();
}
static void test_timers_fired_multiple_times(enum arch_timer timer)
{
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
}
/*
* Set a timer for tval=delta_1_ms then reprogram it to
* tval=delta_2_ms. Check that we get the timer fired. There is no
* timeout for the wait: we use the wfi instruction.
*/
static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
int32_t delta_1_ms, int32_t delta_2_ms)
{
local_irq_disable();
reset_timer_state(timer, DEF_CNT);
/* Program the timer to DEF_CNT + delta_1_ms. */
set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
/* Reprogram the timer to DEF_CNT + delta_2_ms. */
timer_set_tval(timer, msec_to_cycles(delta_2_ms));
/* This method re-enables IRQs to handle the one we're looking for. */
wm();
/* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
GUEST_ASSERT(timer_get_cntct(timer) >=
DEF_CNT + msec_to_cycles(delta_2_ms));
local_irq_enable();
assert_irqs_handled(1);
};
static void test_reprogram_timers(enum arch_timer timer)
{
int i;
uint64_t base_wait = test_args.wait_ms;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
/*
* Ensure reprogramming works whether going from a
* longer time to a shorter or vice versa.
*/
test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
base_wait);
test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
2 * base_wait);
}
}
static void test_basic_functionality(enum arch_timer timer)
{
int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
test_timer_cval(timer, cval, wm, true, DEF_CNT);
test_timer_tval(timer, tval, wm, true, DEF_CNT);
}
}
/*
* This test checks basic timer behavior without actually firing timers, things
* like: the relationship between cval and tval, tval down-counting.
*/
static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
{
reset_timer_state(timer, DEF_CNT);
local_irq_disable();
/* cval in the past */
timer_set_cval(timer,
timer_get_cntct(timer) -
msec_to_cycles(test_args.wait_ms));
if (use_sched)
userspace_migrate_vcpu();
GUEST_ASSERT(timer_get_tval(timer) < 0);
/* tval in the past */
timer_set_tval(timer, -1);
if (use_sched)
userspace_migrate_vcpu();
GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
/* tval larger than TVAL_MAX. This requires programming with
* timer_set_cval instead so the value is expressible
*/
timer_set_cval(timer,
timer_get_cntct(timer) + TVAL_MAX +
msec_to_cycles(test_args.wait_ms));
if (use_sched)
userspace_migrate_vcpu();
GUEST_ASSERT(timer_get_tval(timer) <= 0);
/*
* tval larger than 2 * TVAL_MAX.
* Twice the TVAL_MAX completely loops around the TVAL.
*/
timer_set_cval(timer,
timer_get_cntct(timer) + 2ULL * TVAL_MAX +
msec_to_cycles(test_args.wait_ms));
if (use_sched)
userspace_migrate_vcpu();
GUEST_ASSERT(timer_get_tval(timer) <=
msec_to_cycles(test_args.wait_ms));
/* negative tval that rollovers from 0. */
set_counter(timer, msec_to_cycles(1));
timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
if (use_sched)
userspace_migrate_vcpu();
GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
/* tval should keep down-counting from 0 to -1. */
timer_set_tval(timer, 0);
sleep_poll(timer, 1);
GUEST_ASSERT(timer_get_tval(timer) < 0);
local_irq_enable();
/* Mask and disable any pending timer. */
timer_set_ctl(timer, CTL_IMASK);
}
static void test_timers_sanity_checks(enum arch_timer timer)
{
timers_sanity_checks(timer, false);
/* Check how KVM saves/restores these edge-case values. */
timers_sanity_checks(timer, true);
}
static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
{
local_irq_disable();
reset_timer_state(timer, DEF_CNT);
set_cval_irq(timer,
(uint64_t) TVAL_MAX +
msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
set_counter(timer, TVAL_MAX);
/* This method re-enables IRQs to handle the one we're looking for. */
wm();
assert_irqs_handled(1);
local_irq_enable();
}
/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
static void test_timers_above_tval_max(enum arch_timer timer)
{
uint64_t cval;
int i;
/*
* Test that the system is not implementing cval in terms of
* tval. If that was the case, setting a cval to "cval = now
* + TVAL_MAX + wait_ms" would wrap to "cval = now +
* wait_ms", and the timer would fire immediately. Test that it
* doesn't.
*/
for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
reset_timer_state(timer, DEF_CNT);
cval = timer_get_cntct(timer) + TVAL_MAX +
msec_to_cycles(test_args.wait_ms);
test_cval_no_irq(timer, cval,
msecs_to_usecs(test_args.wait_ms) +
TIMEOUT_NO_IRQ_US, sleep_method[i]);
}
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
/* Get the IRQ by moving the counter forward. */
test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
}
}
/*
* Template function to be used by the test_move_counter_ahead_* tests. It
* sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
* then waits for an IRQ.
*/
static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
uint64_t xval, uint64_t cnt_2,
irq_wait_method_t wm, enum timer_view tv)
{
local_irq_disable();
set_counter(timer, cnt_1);
timer_set_ctl(timer, CTL_IMASK);
set_xval_irq(timer, xval, CTL_ENABLE, tv);
set_counter(timer, cnt_2);
/* This method re-enables IRQs to handle the one we're looking for. */
wm();
assert_irqs_handled(1);
local_irq_enable();
}
/*
* Template function to be used by the test_move_counter_ahead_* tests. It
* sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
* then waits for an IRQ.
*/
static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
uint64_t cnt_1, uint64_t xval,
uint64_t cnt_2,
sleep_method_t guest_sleep,
enum timer_view tv)
{
local_irq_disable();
set_counter(timer, cnt_1);
timer_set_ctl(timer, CTL_IMASK);
set_xval_irq(timer, xval, CTL_ENABLE, tv);
set_counter(timer, cnt_2);
guest_sleep(timer, TIMEOUT_NO_IRQ_US);
local_irq_enable();
isb();
/* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
assert_irqs_handled(0);
timer_set_ctl(timer, CTL_IMASK);
}
static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
int32_t tval, uint64_t cnt_2,
irq_wait_method_t wm)
{
test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
}
static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
uint64_t cval, uint64_t cnt_2,
irq_wait_method_t wm)
{
test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
}
static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
uint64_t cnt_1, int32_t tval,
uint64_t cnt_2, sleep_method_t wm)
{
test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
TIMER_TVAL);
}
static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
uint64_t cnt_1, uint64_t cval,
uint64_t cnt_2, sleep_method_t wm)
{
test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
TIMER_CVAL);
}
/* Set a timer and then move the counter ahead of it. */
static void test_move_counters_ahead_of_timers(enum arch_timer timer)
{
int i;
int32_t tval;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
/* Move counter ahead of negative tval. */
test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
tval = TVAL_MAX;
test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
wm);
}
for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
sleep_method_t sm = sleep_method[i];
test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
}
}
/*
* Program a timer, mask it, and then change the tval or counter to cancel it.
* Unmask it and check that nothing fires.
*/
static void test_move_counters_behind_timers(enum arch_timer timer)
{
int i;
for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
sleep_method_t sm = sleep_method[i];
test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
sm);
test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
}
}
static void test_timers_in_the_past(enum arch_timer timer)
{
int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
uint64_t cval;
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
/* set a timer wait_ms the past. */
cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
test_timer_cval(timer, cval, wm, true, DEF_CNT);
test_timer_tval(timer, tval, wm, true, DEF_CNT);
/* Set a timer to counter=0 (in the past) */
test_timer_cval(timer, 0, wm, true, DEF_CNT);
/* Set a time for tval=0 (now) */
test_timer_tval(timer, 0, wm, true, DEF_CNT);
/* Set a timer to as far in the past as possible */
test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
}
/*
* Set the counter to wait_ms, and a tval to -wait_ms. There should be no
* IRQ as that tval means cval=CVAL_MAX-wait_ms.
*/
for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
sleep_method_t sm = sleep_method[i];
set_counter(timer, msec_to_cycles(test_args.wait_ms));
test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
}
}
static void test_long_timer_delays(enum arch_timer timer)
{
int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
test_timer_cval(timer, cval, wm, true, DEF_CNT);
test_timer_tval(timer, tval, wm, true, DEF_CNT);
}
}
static void guest_run_iteration(enum arch_timer timer)
{
test_basic_functionality(timer);
test_timers_sanity_checks(timer);
test_timers_above_tval_max(timer);
test_timers_in_the_past(timer);
test_move_counters_ahead_of_timers(timer);
test_move_counters_behind_timers(timer);
test_reprogram_timers(timer);
test_timers_fired_multiple_times(timer);
test_timer_control_mask_then_unmask(timer);
test_timer_control_masks(timer);
}
static void guest_code(enum arch_timer timer)
{
int i;
local_irq_disable();
gic_init(GIC_V3, 1);
timer_set_ctl(VIRTUAL, CTL_IMASK);
timer_set_ctl(PHYSICAL, CTL_IMASK);
gic_irq_enable(vtimer_irq);
gic_irq_enable(ptimer_irq);
local_irq_enable();
for (i = 0; i < test_args.iterations; i++) {
GUEST_SYNC(i);
guest_run_iteration(timer);
}
test_long_timer_delays(timer);
GUEST_DONE();
}
static uint32_t next_pcpu(void)
{
uint32_t max = get_nprocs();
uint32_t cur = sched_getcpu();
uint32_t next = cur;
cpu_set_t cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
sched_getaffinity(0, sizeof(cpuset), &cpuset);
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
return next;
}
static void migrate_self(uint32_t new_pcpu)
{
int ret;
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(new_pcpu, &cpuset);
pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
new_pcpu, ret);
}
static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
enum arch_timer timer)
{
if (timer == PHYSICAL)
vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
else
vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
}
static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
{
enum sync_cmd cmd = uc->args[1];
uint64_t val = uc->args[2];
enum arch_timer timer = uc->args[3];
switch (cmd) {
case SET_COUNTER_VALUE:
kvm_set_cntxct(vcpu, val, timer);
break;
case USERSPACE_USLEEP:
usleep(val);
break;
case USERSPACE_SCHED_YIELD:
sched_yield();
break;
case USERSPACE_MIGRATE_SELF:
migrate_self(next_pcpu());
break;
default:
break;
}
}
static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct ucall uc;
/* Start on CPU 0 */
migrate_self(0);
while (true) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
handle_sync(vcpu, &uc);
break;
case UCALL_DONE:
goto out;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
goto out;
default:
TEST_FAIL("Unexpected guest exit\n");
}
}
out:
return;
}
static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
enum arch_timer timer)
{
*vm = vm_create_with_one_vcpu(vcpu, guest_code);
TEST_ASSERT(*vm, "Failed to create the test VM\n");
vm_init_descriptor_tables(*vm);
vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
guest_irq_handler);
vcpu_init_descriptor_tables(*vcpu);
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
vgic_v3_setup(*vm, 1, 64);
sync_global_to_guest(*vm, test_args);
}
static void test_print_help(char *name)
{
pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
, name);
pr_info("\t-i: Number of iterations (default: %u)\n",
NR_TEST_ITERS_DEF);
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
pr_info("\t-h: Print this help message\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
switch (opt) {
case 'b':
test_args.test_physical = true;
test_args.test_virtual = true;
break;
case 'i':
test_args.iterations =
atoi_positive("Number of iterations", optarg);
break;
case 'l':
test_args.long_wait_ms =
atoi_positive("Long wait time", optarg);
break;
case 'p':
test_args.test_physical = true;
test_args.test_virtual = false;
break;
case 'v':
test_args.test_virtual = true;
test_args.test_physical = false;
break;
case 'w':
test_args.wait_ms = atoi_positive("Wait time", optarg);
break;
case 'h':
default:
goto err;
}
}
return true;
err:
test_print_help(argv[0]);
return false;
}
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
kvm_vm_free(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
kvm_vm_free(vm);
}
return 0;
}
// SPDX-License-Identifier: GPL-2.0
// Check that, on a GICv3 system, not configuring GICv3 correctly
// results in all of the sysregs generating an UNDEF exception.
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
static volatile bool handled;
#define __check_sr_read(r) \
({ \
uint64_t val; \
\
handled = false; \
dsb(sy); \
val = read_sysreg_s(SYS_ ## r); \
val; \
})
#define __check_sr_write(r) \
do { \
handled = false; \
dsb(sy); \
write_sysreg_s(0, SYS_ ## r); \
isb(); \
} while(0)
/* Fatal checks */
#define check_sr_read(r) \
do { \
__check_sr_read(r); \
__GUEST_ASSERT(handled, #r " no read trap"); \
} while(0)
#define check_sr_write(r) \
do { \
__check_sr_write(r); \
__GUEST_ASSERT(handled, #r " no write trap"); \
} while(0)
#define check_sr_rw(r) \
do { \
check_sr_read(r); \
check_sr_write(r); \
} while(0)
static void guest_code(void)
{
uint64_t val;
/*
* Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
* hidden the feature at runtime without any other userspace action.
*/
__GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
read_sysreg(id_aa64pfr0_el1)) == 0,
"GICv3 wrongly advertised");
/*
* Access all GICv3 registers, and fail if we don't get an UNDEF.
* Note that we happily access all the APxRn registers without
* checking their existance, as all we want to see is a failure.
*/
check_sr_rw(ICC_PMR_EL1);
check_sr_read(ICC_IAR0_EL1);
check_sr_write(ICC_EOIR0_EL1);
check_sr_rw(ICC_HPPIR0_EL1);
check_sr_rw(ICC_BPR0_EL1);
check_sr_rw(ICC_AP0R0_EL1);
check_sr_rw(ICC_AP0R1_EL1);
check_sr_rw(ICC_AP0R2_EL1);
check_sr_rw(ICC_AP0R3_EL1);
check_sr_rw(ICC_AP1R0_EL1);
check_sr_rw(ICC_AP1R1_EL1);
check_sr_rw(ICC_AP1R2_EL1);
check_sr_rw(ICC_AP1R3_EL1);
check_sr_write(ICC_DIR_EL1);
check_sr_read(ICC_RPR_EL1);
check_sr_write(ICC_SGI1R_EL1);
check_sr_write(ICC_ASGI1R_EL1);
check_sr_write(ICC_SGI0R_EL1);
check_sr_read(ICC_IAR1_EL1);
check_sr_write(ICC_EOIR1_EL1);
check_sr_rw(ICC_HPPIR1_EL1);
check_sr_rw(ICC_BPR1_EL1);
check_sr_rw(ICC_CTLR_EL1);
check_sr_rw(ICC_IGRPEN0_EL1);
check_sr_rw(ICC_IGRPEN1_EL1);
/*
* ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
* be RAO/WI. Engage in non-fatal accesses, starting with a
* write of 0 to try and disable SRE, and let's see if it
* sticks.
*/
__check_sr_write(ICC_SRE_EL1);
if (!handled)
GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
val = __check_sr_read(ICC_SRE_EL1);
if (!handled) {
__GUEST_ASSERT((val & BIT(0)),
"ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
}
GUEST_DONE();
}
static void guest_undef_handler(struct ex_regs *regs)
{
/* Success, we've gracefully exploded! */
handled = true;
regs->pc += 4;
}
static void test_run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
do {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_PRINTF:
printf("%s", uc.buffer);
break;
case UCALL_DONE:
break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
} while (uc.cmd != UCALL_DONE);
}
static void test_guest_no_gicv3(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
/* Create a VM without a GICv3 */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpu);
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_EC_UNKNOWN, guest_undef_handler);
test_run_vcpu(vcpu);
kvm_vm_free(vm);
}
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t pfr0;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
__TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
"GICv3 not supported.");
kvm_vm_free(vm);
test_guest_no_gicv3();
return 0;
}
......@@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
......
......@@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args,
KVM_INJECT_MULTI(cmd, first_intid, num);
while (irq_handled < num) {
asm volatile("wfi\n"
"msr daifclr, #2\n"
/* handle IRQ */
"msr daifset, #2\n"
: : : "memory");
wfi();
local_irq_enable();
isb(); /* handle IRQ */
local_irq_disable();
}
asm volatile("msr daifclr, #2" : : : "memory");
local_irq_enable();
GUEST_ASSERT_EQ(irq_handled, num);
for (i = first_intid; i < num + first_intid; i++)
......
......@@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
return 0;
}
static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
{
switch (timer) {
case VIRTUAL:
......@@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
isb();
}
static inline int32_t timer_get_tval(enum arch_timer timer)
{
isb();
switch (timer) {
case VIRTUAL:
return read_sysreg(cntv_tval_el0);
case PHYSICAL:
return read_sysreg(cntp_tval_el0);
default:
GUEST_FAIL("Could not get timer %d\n", timer);
}
/* We should not reach here */
return 0;
}
static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
{
switch (timer) {
......
......@@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
/* Execute a Wait For Interrupt instruction. */
void wfi(void);
#endif /* SELFTEST_KVM_PROCESSOR_H */
......@@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
sparsebit_set_num(vm->vpages_valid, 0,
(1ULL << vm->va_bits) >> vm->page_shift);
}
/* Helper to call wfi instruction. */
void wfi(void)
{
asm volatile("wfi");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment