Commit 714797c9 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 5.18

- Proper emulation of the OSLock feature of the debug architecture

- Scalibility improvements for the MMU lock when dirty logging is on

- New VMID allocator, which will eventually help with SVA in VMs

- Better support for PMUs in heterogenous systems

- PSCI 1.1 support, enabling support for SYSTEM_RESET2

- Implement CONFIG_DEBUG_LIST at EL2

- Make CONFIG_ARM64_ERRATUM_2077057 default y

- Reduce the overhead of VM exit when no interrupt is pending

- Remove traces of 32bit ARM host support from the documentation

- Updated vgic selftests

- Various cleanups, doc updates and spelling fixes
parents cf501981 21ea4578
This diff is collapsed.
......@@ -70,7 +70,7 @@ irqchip.
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY PMUv3 already initialized
-EBUSY PMUv3 already initialized or a VCPU has already run
-EINVAL Invalid filter range
======= ======================================================
......@@ -104,11 +104,43 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
isn't strictly speaking an event. Filtering the cycle counter is possible
using event 0x11 (CPU_CYCLES).
1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
------------------------------------------
:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
identifier.
:Returns:
======= ====================================================
-EBUSY PMUv3 already initialized, a VCPU has already run or
an event filter has already been set
-EFAULT Error accessing the PMU identifier
-ENXIO PMU not found
-ENODEV PMUv3 not supported or GIC not initialized
-ENOMEM Could not allocate memory
======= ====================================================
Request that the VCPU uses the specified hardware PMU when creating guest events
for the purpose of PMU emulation. The PMU identifier can be read from the "type"
file for the desired PMU instance under /sys/devices (or, equivalent,
/sys/bus/even_source). This attribute is particularly useful on heterogeneous
systems where there are at least two CPU PMUs on the system. The PMU that is set
for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU
if a PMU event filter is already present.
Note that KVM will not make any attempts to run the VCPU on the physical CPUs
associated with the PMU specified by this attribute. This is entirely left to
userspace. However, attempting to run the VCPU on a physical CPU not supported
by the PMU will fail and KVM_RUN will return with
exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting
hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and
the cpu field to the processor id.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================
:Architectures: ARM, ARM64
:Architectures: ARM64
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-----------------------------------------------------------------------------
......
......@@ -682,6 +682,7 @@ config ARM64_ERRATUM_2051678
config ARM64_ERRATUM_2077057
bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
default y
help
This option adds the workaround for ARM Cortex-A510 erratum 2077057.
Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
......
......@@ -50,6 +50,8 @@
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
#define KVM_HAVE_MMU_RWLOCK
/*
* Mode of operation configurable with kvm-arm.mode early param.
* See Documentation/admin-guide/kernel-parameters.txt for more information.
......@@ -71,9 +73,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_vmid {
/* The VMID generation used for the virt. memory system */
u64 vmid_gen;
u32 vmid;
atomic64_t id;
};
struct kvm_s2_mmu {
......@@ -122,20 +122,24 @@ struct kvm_arch {
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
* supported.
*/
bool return_nisv_io_abort_to_user;
#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
/* Memory Tagging Extension enabled for the guest */
#define KVM_ARCH_FLAG_MTE_ENABLED 1
/* At least one vCPU has ran in the VM */
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
unsigned long flags;
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
*/
unsigned long *pmu_filter;
unsigned int pmuver;
struct arm_pmu *arm_pmu;
cpumask_var_t supported_cpus;
u8 pfr0_csv2;
u8 pfr0_csv3;
/* Memory Tagging Extension enabled for the guest */
bool mte_enabled;
};
struct kvm_vcpu_fault_info {
......@@ -171,6 +175,7 @@ enum vcpu_sysreg {
PAR_EL1, /* Physical Address Register */
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
/* Performance Monitors Registers */
......@@ -435,6 +440,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
......@@ -453,6 +459,15 @@ struct kvm_vcpu_arch {
#define vcpu_has_ptrauth(vcpu) false
#endif
#define vcpu_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \
((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
/*
......@@ -692,6 +707,12 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
extern unsigned int kvm_arm_vmid_bits;
int kvm_arm_vmid_alloc_init(void);
void kvm_arm_vmid_alloc_free(void);
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
vcpu_arch->steal.base = GPA_INVALID;
......@@ -725,6 +746,10 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
#define kvm_vcpu_os_lock_enabled(vcpu) \
(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
......@@ -786,7 +811,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
......
......@@ -115,6 +115,7 @@ alternative_cb_end
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/kvm_host.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
......@@ -266,7 +267,8 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
baddr = mmu->pgd_phys;
vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits);
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
......
......@@ -128,8 +128,16 @@
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
#define SYS_OSLAR_OSLK BIT(0)
#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0))
#define SYS_OSLSR_OSLM_NI 0
#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3)
#define SYS_OSLSR_OSLK BIT(1)
#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6)
......
......@@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
......@@ -413,6 +414,16 @@ struct kvm_arm_copy_mte_tags {
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
/* arm64-specific kvm_run::system_event flags */
/*
* Reset caused by a PSCI v1.1 SYSTEM_RESET2 call.
* Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET.
*/
#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0)
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
#endif
#endif /* __ARM_KVM_H__ */
......@@ -348,7 +348,13 @@ static void task_fpsimd_load(void)
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
* date with respect to the CPU registers.
* date with respect to the CPU registers. Note carefully that the
* current context is the context last bound to the CPU stored in
* last, if KVM is involved this may be the guest VM context rather
* than the host thread for the VM pointed to by current. This means
* that we must always reference the state storage via last rather
* than via current, other than the TIF_ flags which KVM will
* carefully maintain for us.
*/
static void fpsimd_save(void)
{
......
......@@ -79,6 +79,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
/* VMID bits set by the KVM VMID allocator */
KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
/* Kernel symbols needed for cpus_have_final/const_caps checks. */
KVM_NVHE_ALIAS(arm64_const_caps_ready);
KVM_NVHE_ALIAS(cpu_hwcap_keys);
......
......@@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
arch_timer.o trng.o\
arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
......
......@@ -53,11 +53,6 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u32 kvm_next_vmid;
static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
......@@ -89,7 +84,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&kvm->arch.flags);
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
......@@ -97,7 +93,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EINVAL;
} else {
r = 0;
kvm->arch.mte_enabled = true;
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
mutex_unlock(&kvm->lock);
break;
......@@ -150,6 +146,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
goto out_free_stage2_pgd;
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
......@@ -176,6 +176,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
bitmap_free(kvm->arch.pmu_filter);
free_cpumask_var(kvm->arch.supported_cpus);
kvm_vgic_destroy(kvm);
......@@ -411,6 +412,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu_has_ptrauth(vcpu))
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
......@@ -422,7 +426,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
kvm_arm_vmid_clear_active();
vcpu_clear_on_unsupported_cpu(vcpu);
vcpu->cpu = -1;
}
......@@ -489,87 +495,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
}
#endif
/* Just ensure a guest exit from a particular CPU */
static void exit_vm_noop(void *info)
{
}
void force_vm_exit(const cpumask_t *mask)
{
preempt_disable();
smp_call_function_many(mask, exit_vm_noop, NULL, true);
preempt_enable();
}
/**
* need_new_vmid_gen - check that the VMID is still valid
* @vmid: The VMID to check
*
* return true if there is a new generation of VMIDs being used
*
* The hardware supports a limited set of values with the value zero reserved
* for the host, so we check if an assigned value belongs to a previous
* generation, which requires us to assign a new value. If we're the first to
* use a VMID for the new generation, we must flush necessary caches and TLBs
* on all CPUs.
*/
static bool need_new_vmid_gen(struct kvm_vmid *vmid)
{
u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
}
/**
* update_vmid - Update the vmid with a valid VMID for the current generation
* @vmid: The stage-2 VMID information struct
*/
static void update_vmid(struct kvm_vmid *vmid)
{
if (!need_new_vmid_gen(vmid))
return;
spin_lock(&kvm_vmid_lock);
/*
* We need to re-check the vmid_gen here to ensure that if another vcpu
* already allocated a valid vmid for this vm, then this vcpu should
* use the same vmid.
*/
if (!need_new_vmid_gen(vmid)) {
spin_unlock(&kvm_vmid_lock);
return;
}
/* First user of a new VMID generation? */
if (unlikely(kvm_next_vmid == 0)) {
atomic64_inc(&kvm_vmid_gen);
kvm_next_vmid = 1;
/*
* On SMP we know no other CPUs can use this CPU's or each
* other's VMID after force_vm_exit returns since the
* kvm_vmid_lock blocks them from reentry to the guest.
*/
force_vm_exit(cpu_all_mask);
/*
* Now broadcast TLB + ICACHE invalidation over the inner
* shareable domain to make sure all data structures are
* clean.
*/
kvm_call_hyp(__kvm_flush_vm_context);
}
WRITE_ONCE(vmid->vmid, kvm_next_vmid);
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
smp_wmb();
WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
return vcpu->arch.target >= 0;
......@@ -634,6 +559,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
mutex_lock(&kvm->lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
mutex_unlock(&kvm->lock);
return ret;
}
......@@ -792,8 +721,15 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
}
}
if (unlikely(vcpu_on_unsupported_cpu(vcpu))) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED;
run->fail_entry.cpu = smp_processor_id();
*ret = 0;
return true;
}
return kvm_request_pending(vcpu) ||
need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
xfer_to_guest_mode_work_pending();
}
......@@ -855,8 +791,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (!ret)
ret = 1;
update_vmid(&vcpu->arch.hw_mmu->vmid);
check_vcpu_requests(vcpu);
/*
......@@ -866,6 +800,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
/*
* The VMID allocator only tracks active VMIDs per
* physical CPU, and therefore the VMID allocated may not be
* preserved on VMID roll-over if the task was preempted,
* making a thread's VMID inactive. So we need to call
* kvm_arm_vmid_update() in non-premptible context.
*/
kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
......@@ -945,9 +888,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* context synchronization event) is necessary to ensure that
* pending interrupts are taken.
*/
if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) {
local_irq_enable();
isb();
local_irq_disable();
}
guest_timing_exit_irqoff();
......@@ -1745,7 +1690,7 @@ static void init_cpu_logical_map(void)
/*
* Copy the MPIDR <-> logical CPU ID mapping to hyp.
* Only copy the set of online CPUs whose features have been chacked
* Only copy the set of online CPUs whose features have been checked
* against the finalized system capabilities. The hypervisor will not
* allow any other CPUs from the `possible` set to boot.
*/
......@@ -2161,6 +2106,12 @@ int kvm_arch_init(void *opaque)
if (err)
return err;
err = kvm_arm_vmid_alloc_init();
if (err) {
kvm_err("Failed to initialize VMID allocator.\n");
return err;
}
if (!in_hyp_mode) {
err = init_hyp_mode();
if (err)
......@@ -2200,6 +2151,7 @@ int kvm_arch_init(void *opaque)
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
kvm_arm_vmid_alloc_free();
return err;
}
......
......@@ -105,9 +105,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
* - Userspace is using the hardware to debug the guest
* (KVM_GUESTDBG_USE_HW is set).
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
* - The guest has enabled the OS Lock (debug exceptions are blocked).
*/
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) ||
kvm_vcpu_os_lock_enabled(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
......@@ -160,8 +162,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
kvm_arm_setup_mdcr_el2(vcpu);
/* Is Guest debugging in effect? */
if (vcpu->guest_debug) {
/* Check if we need to use the debug registers. */
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
/* Save guest debug state */
save_guest_debug_regs(vcpu);
......@@ -223,6 +225,19 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
&vcpu->arch.debug_ptr->dbg_wcr[0],
&vcpu->arch.debug_ptr->dbg_wvr[0]);
/*
* The OS Lock blocks debug exceptions in all ELs when it is
* enabled. If the guest has enabled the OS Lock, constrain its
* effects to the guest. Emulate the behavior by clearing
* MDSCR_EL1.MDE. In so doing, we ensure that host debug
* exceptions are unaffected by guest configuration of the OS
* Lock.
*/
} else if (kvm_vcpu_os_lock_enabled(vcpu)) {
mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
mdscr &= ~DBG_MDSCR_MDE;
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
}
......@@ -244,7 +259,10 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
{
trace_kvm_arm_clear_debug(vcpu->guest_debug);
if (vcpu->guest_debug) {
/*
* Restore the guest's debug registers if we were using them.
*/
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
restore_guest_debug_regs(vcpu);
/*
......
......@@ -84,6 +84,11 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
/*
* Called just before entering the guest once we are no longer
* preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM
* mirror of the flag used by the hypervisor.
*/
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
......@@ -93,10 +98,11 @@ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
}
/*
* If the guest FPSIMD state was loaded, update the host's context
* tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
* so that they will be written back if the kernel clobbers them due to
* kernel-mode NEON before re-entry into the guest.
* Called just after exiting the guest. If the guest FPSIMD state
* was loaded, update the host's context tracking data mark the CPU
* FPSIMD regs as dirty and belonging to vcpu so that they will be
* written back if the kernel clobbers them due to kernel-mode NEON
* before re-entry into the guest.
*/
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{
......
......@@ -282,7 +282,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
break;
/*
* Otherwide, this is a priviledged mode, and *all* the
* Otherwise, this is a privileged mode, and *all* the
* registers must be narrowed to 32bit.
*/
default:
......
......@@ -248,7 +248,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
* is pre-empted by kvm_reboot()'s shutdown call.
* is pre-emptied by kvm_reboot()'s shutdown call.
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;
......
......@@ -173,6 +173,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
if (has_vhe()) {
reg = CPACR_EL1_FPEN;
if (sve_guest)
......@@ -188,11 +190,13 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
}
isb();
/* Write out the host state if it's in the registers */
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
/* Restore the guest state */
if (sve_guest)
__hyp_sve_restore_guest(vcpu);
else
......
......@@ -13,10 +13,11 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-y += $(lib-objs)
##
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2022 - Google LLC
* Author: Keir Fraser <keirf@google.com>
*/
#include <linux/list.h>
#include <linux/bug.h>
static inline __must_check bool nvhe_check_data_corruption(bool v)
{
return v;
}
#define NVHE_CHECK_DATA_CORRUPTION(condition) \
nvhe_check_data_corruption(({ \
bool corruption = unlikely(condition); \
if (corruption) { \
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
BUG_ON(1); \
} else \
WARN_ON(1); \
} \
corruption; \
}))
/* The predicates checked here are taken from lib/list_debug.c. */
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) ||
NVHE_CHECK_DATA_CORRUPTION(prev->next != next) ||
NVHE_CHECK_DATA_CORRUPTION(new == prev || new == next))
return false;
return true;
}
bool __list_del_entry_valid(struct list_head *entry)
{
struct list_head *prev, *next;
prev = entry->prev;
next = entry->next;
if (NVHE_CHECK_DATA_CORRUPTION(next == LIST_POISON1) ||
NVHE_CHECK_DATA_CORRUPTION(prev == LIST_POISON2) ||
NVHE_CHECK_DATA_CORRUPTION(prev->next != entry) ||
NVHE_CHECK_DATA_CORRUPTION(next->prev != entry))
return false;
return true;
}
......@@ -138,8 +138,7 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
mmu->pgt = &host_kvm.pgt;
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
WRITE_ONCE(mmu->vmid.vmid, 0);
atomic64_set(&mmu->vmid.id, 0);
return 0;
}
......
......@@ -102,7 +102,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
* Only the first struct hyp_page of a high-order page (otherwise known
* as the 'head') should have p->order set. The non-head pages should
* have p->order = HYP_NO_ORDER. Here @p may no longer be the head
* after coallescing, so make sure to mark it HYP_NO_ORDER proactively.
* after coalescing, so make sure to mark it HYP_NO_ORDER proactively.
*/
p->order = HYP_NO_ORDER;
for (; (order + 1) < pool->max_order; order++) {
......@@ -110,7 +110,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
if (!buddy)
break;
/* Take the buddy out of its list, and coallesce with @p */
/* Take the buddy out of its list, and coalesce with @p */
page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Stubs for out-of-line function calls caused by re-using kernel
* infrastructure at EL2.
*
* Copyright (C) 2020 - Google LLC
*/
#include <linux/list.h>
#ifdef CONFIG_DEBUG_LIST
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
return true;
}
bool __list_del_entry_valid(struct list_head *entry)
{
return true;
}
#endif
......@@ -135,7 +135,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
run->arm_nisv.fault_ipa = fault_ipa;
......
......@@ -58,7 +58,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
break;
if (resched && next != end)
cond_resched_lock(&kvm->mmu_lock);
cond_resched_rwlock_write(&kvm->mmu_lock);
} while (addr = next, addr != end);
return ret;
......@@ -179,7 +179,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
phys_addr_t end = start + size;
assert_spin_locked(&kvm->mmu_lock);
lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
may_block));
......@@ -213,13 +213,13 @@ static void stage2_flush_vm(struct kvm *kvm)
int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
......@@ -615,7 +615,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
};
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
* @mmu: The pointer to the s2 MMU structure
*
......@@ -653,7 +653,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
return 0;
out_destroy_pgtable:
......@@ -720,13 +719,13 @@ void stage2_unmap_vm(struct kvm *kvm)
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm);
srcu_read_unlock(&kvm->srcu, idx);
}
......@@ -736,14 +735,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
pgt = mmu->pgt;
if (pgt) {
mmu->pgd_phys = 0;
mmu->pgt = NULL;
free_percpu(mmu->last_vcpu_ran);
}
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
if (pgt) {
kvm_pgtable_stage2_destroy(pgt);
......@@ -783,10 +782,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
break;
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
&cache);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
if (ret)
break;
......@@ -834,9 +833,9 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
start = memslot->base_gfn << PAGE_SHIFT;
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
......@@ -1080,6 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
bool logging_perm_fault = false;
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
unsigned long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
......@@ -1114,6 +1114,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
logging_perm_fault = (fault_status == FSC_PERM && write_fault);
} else {
vma_shift = get_vma_page_shift(vma, hva);
}
......@@ -1212,7 +1213,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && device)
return -ENOEXEC;
spin_lock(&kvm->mmu_lock);
/*
* To reduce MMU contentions and enhance concurrency during dirty
* logging dirty logging, only acquire read lock for permission
* relaxation.
*/
if (logging_perm_fault)
read_lock(&kvm->mmu_lock);
else
write_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
......@@ -1271,7 +1280,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
out_unlock:
spin_unlock(&kvm->mmu_lock);
if (logging_perm_fault)
read_unlock(&kvm->mmu_lock);
else
write_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
......@@ -1286,10 +1298,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_access_fault(fault_ipa);
spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
pte = __pte(kpte);
if (pte_valid(pte))
......@@ -1692,9 +1704,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}
/*
......
......@@ -7,6 +7,7 @@
#include <linux/cpu.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
......@@ -16,6 +17,9 @@
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static LIST_HEAD(arm_pmus);
static DEFINE_MUTEX(arm_pmus_lock);
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
......@@ -24,7 +28,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
switch (kvm->arch.pmuver) {
unsigned int pmuver;
pmuver = kvm->arch.arm_pmu->pmuver;
switch (pmuver) {
case ID_AA64DFR0_PMUVER_8_0:
return GENMASK(9, 0);
case ID_AA64DFR0_PMUVER_8_1:
......@@ -33,7 +41,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
case ID_AA64DFR0_PMUVER_8_7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
return 0;
}
}
......@@ -600,6 +608,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
*/
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
struct perf_event *event;
......@@ -636,7 +645,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
......@@ -745,17 +754,33 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
void kvm_host_pmu_init(struct arm_pmu *pmu)
{
if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
!kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
struct arm_pmu_entry *entry;
if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF ||
is_protected_kvm_enabled())
return;
mutex_lock(&arm_pmus_lock);
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out_unlock;
entry->arm_pmu = pmu;
list_add_tail(&entry->entry, &arm_pmus);
if (list_is_singular(&arm_pmus))
static_branch_enable(&kvm_arm_pmu_available);
out_unlock:
mutex_unlock(&arm_pmus_lock);
}
static int kvm_pmu_probe_pmuver(void)
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
struct arm_pmu *pmu = NULL;
/*
* Create a dummy event that only counts user cycles. As we'll never
......@@ -780,19 +805,20 @@ static int kvm_pmu_probe_pmuver(void)
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return ID_AA64DFR0_PMUVER_IMP_DEF;
return NULL;
}
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver)
pmuver = pmu->pmuver;
if (pmu->pmuver == 0 ||
pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
pmu = NULL;
}
perf_event_disable(event);
perf_event_release_kernel(event);
return pmuver;
return pmu;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
......@@ -810,7 +836,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
* as RAZ
*/
if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
base = 32;
}
......@@ -922,26 +948,64 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
return true;
}
static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
{
struct kvm *kvm = vcpu->kvm;
struct arm_pmu_entry *entry;
struct arm_pmu *arm_pmu;
int ret = -ENXIO;
mutex_lock(&kvm->lock);
mutex_lock(&arm_pmus_lock);
list_for_each_entry(entry, &arm_pmus, entry) {
arm_pmu = entry->arm_pmu;
if (arm_pmu->pmu.type == pmu_id) {
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
ret = -EBUSY;
break;
}
kvm->arch.arm_pmu = arm_pmu;
cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
ret = 0;
break;
}
}
mutex_unlock(&arm_pmus_lock);
mutex_unlock(&kvm->lock);
return ret;
}
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
mutex_lock(&kvm->lock);
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
if (!kvm->arch.arm_pmu) {
mutex_unlock(&kvm->lock);
return -ENODEV;
}
}
mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
if (!irqchip_in_kernel(vcpu->kvm))
if (!irqchip_in_kernel(kvm))
return -EINVAL;
if (get_user(irq, uaddr))
......@@ -951,7 +1015,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
if (!pmu_irq_is_valid(vcpu->kvm, irq))
if (!pmu_irq_is_valid(kvm, irq))
return -EINVAL;
if (kvm_arm_pmu_irq_initialized(vcpu))
......@@ -966,7 +1030,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
struct kvm_pmu_event_filter filter;
int nr_events;
nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
nr_events = kvm_pmu_event_mask(kvm) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
......@@ -978,12 +1042,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
mutex_lock(&vcpu->kvm->lock);
mutex_lock(&kvm->lock);
if (!vcpu->kvm->arch.pmu_filter) {
vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!vcpu->kvm->arch.pmu_filter) {
mutex_unlock(&vcpu->kvm->lock);
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
mutex_unlock(&kvm->lock);
return -EBUSY;
}
if (!kvm->arch.pmu_filter) {
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!kvm->arch.pmu_filter) {
mutex_unlock(&kvm->lock);
return -ENOMEM;
}
......@@ -994,20 +1063,29 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
* events, the default is to allow.
*/
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
bitmap_zero(kvm->arch.pmu_filter, nr_events);
else
bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
bitmap_fill(kvm->arch.pmu_filter, nr_events);
}
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
else
bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
mutex_unlock(&vcpu->kvm->lock);
mutex_unlock(&kvm->lock);
return 0;
}
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
int __user *uaddr = (int __user *)(long)attr->addr;
int pmu_id;
if (get_user(pmu_id, uaddr))
return -EFAULT;
return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
}
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
......@@ -1045,6 +1123,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_IRQ:
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
case KVM_ARM_VCPU_PMU_V3_SET_PMU:
if (kvm_vcpu_has_pmu(vcpu))
return 0;
}
......
......@@ -84,7 +84,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
if (!vcpu->arch.power_off) {
if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON;
else
return PSCI_RET_INVALID_PARAMS;
......@@ -161,7 +161,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
return PSCI_0_2_AFFINITY_LEVEL_OFF;
}
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
{
unsigned long i;
struct kvm_vcpu *tmp;
......@@ -181,17 +181,24 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
vcpu->run->system_event.flags = flags;
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
{
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0);
}
static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
{
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0);
}
static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu)
{
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET,
KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2);
}
static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
......@@ -304,24 +311,27 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
return ret;
}
static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
{
u32 psci_fn = smccc_get_function(vcpu);
u32 feature;
u32 arg;
unsigned long val;
int ret = 1;
if (minor > 1)
return -EINVAL;
switch(psci_fn) {
case PSCI_0_2_FN_PSCI_VERSION:
val = KVM_ARM_PSCI_1_0;
val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1;
break;
case PSCI_1_0_FN_PSCI_FEATURES:
feature = smccc_get_arg1(vcpu);
val = kvm_psci_check_allowed_function(vcpu, feature);
arg = smccc_get_arg1(vcpu);
val = kvm_psci_check_allowed_function(vcpu, arg);
if (val)
break;
switch(feature) {
switch(arg) {
case PSCI_0_2_FN_PSCI_VERSION:
case PSCI_0_2_FN_CPU_SUSPEND:
case PSCI_0_2_FN64_CPU_SUSPEND:
......@@ -337,11 +347,36 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
case ARM_SMCCC_VERSION_FUNC_ID:
val = 0;
break;
case PSCI_1_1_FN_SYSTEM_RESET2:
case PSCI_1_1_FN64_SYSTEM_RESET2:
if (minor >= 1) {
val = 0;
break;
}
fallthrough;
default:
val = PSCI_RET_NOT_SUPPORTED;
break;
}
break;
case PSCI_1_1_FN_SYSTEM_RESET2:
kvm_psci_narrow_to_32bit(vcpu);
fallthrough;
case PSCI_1_1_FN64_SYSTEM_RESET2:
if (minor >= 1) {
arg = smccc_get_arg1(vcpu);
if (arg <= PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET ||
arg >= PSCI_1_1_RESET_TYPE_VENDOR_START) {
kvm_psci_system_reset2(vcpu);
vcpu_set_reg(vcpu, 0, PSCI_RET_INTERNAL_FAILURE);
return 0;
}
val = PSCI_RET_INVALID_PARAMS;
break;
}
fallthrough;
default:
return kvm_psci_0_2_call(vcpu);
}
......@@ -391,16 +426,18 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
*/
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
switch (kvm_psci_version(vcpu, vcpu->kvm)) {
switch (kvm_psci_version(vcpu)) {
case KVM_ARM_PSCI_1_1:
return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
return kvm_psci_1_0_call(vcpu);
return kvm_psci_1_x_call(vcpu, 0);
case KVM_ARM_PSCI_0_2:
return kvm_psci_0_2_call(vcpu);
case KVM_ARM_PSCI_0_1:
return kvm_psci_0_1_call(vcpu);
default:
return -EINVAL;
};
}
}
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
......@@ -470,7 +507,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id) {
case KVM_REG_ARM_PSCI_VERSION:
val = kvm_psci_version(vcpu, vcpu->kvm);
val = kvm_psci_version(vcpu);
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
......@@ -510,6 +547,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
return 0;
case KVM_ARM_PSCI_0_2:
case KVM_ARM_PSCI_1_0:
case KVM_ARM_PSCI_1_1:
if (!wants_02)
return -EINVAL;
vcpu->kvm->arch.psci_version = val;
......
......@@ -44,6 +44,10 @@
* 64bit interface.
*/
static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static bool read_from_write_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
......@@ -287,16 +291,55 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
return trap_raz_wi(vcpu, p, r);
}
static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 oslsr;
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
/* Forward the OSLK bit to OSLSR */
oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK;
if (p->regval & SYS_OSLAR_OSLK)
oslsr |= SYS_OSLSR_OSLK;
__vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
return true;
}
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
p->regval = (1 << 3);
if (p->is_write)
return write_to_read_only(vcpu, p, r);
p->regval = __vcpu_sys_reg(vcpu, r->reg);
return true;
}
}
static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
u64 id = sys_reg_to_index(rd);
u64 val;
int err;
err = reg_from_user(&val, uaddr, id);
if (err)
return err;
/*
* The only modifiable bit is the OSLK bit. Refuse the write if
* userspace attempts to change any other bit in the register.
*/
if ((val ^ rd->val) & ~SYS_OSLSR_OSLK)
return -EINVAL;
__vcpu_sys_reg(vcpu, rd->reg) = val;
return 0;
}
static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
......@@ -1164,10 +1207,6 @@ static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
return __access_id_reg(vcpu, p, r, true);
}
static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
/* Visibility overrides for SVE-specific control registers */
static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
......@@ -1418,9 +1457,9 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
* that we don't implement any of the external debug, none of the
* OSlock protocol. This should be revisited if we ever encounter a
* more demanding guest...
* that we don't implement any of the external debug architecture.
* This should be revisited if we ever encounter a more demanding
* guest...
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
......@@ -1447,8 +1486,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
DBG_BCR_BVR_WCR_WVR_EL1(15),
{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 },
{ SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 },
{ SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1,
SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
......@@ -1920,10 +1960,10 @@ static const struct sys_reg_desc cp14_regs[] = {
DBGBXVR(0),
/* DBGOSLAR */
{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_oslar_el1 },
DBGBXVR(1),
/* DBGOSLSR */
{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1, NULL, OSLSR_EL1 },
DBGBXVR(2),
DBGBXVR(3),
/* DBGOSDLR */
......
......@@ -37,7 +37,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
* If you are already holding a lock and need to take a higher one, you
* have to drop the lower ranking lock first and re-aquire it after having
* have to drop the lower ranking lock first and re-acquire it after having
* taken the upper one.
*
* When taking more than one ap_list_lock at the same time, always take the
......
// SPDX-License-Identifier: GPL-2.0
/*
* VMID allocator.
*
* Based on Arm64 ASID allocator algorithm.
* Please refer arch/arm64/mm/context.c for detailed
* comments on algorithm.
*
* Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
unsigned int kvm_arm_vmid_bits;
static DEFINE_RAW_SPINLOCK(cpu_vmid_lock);
static atomic64_t vmid_generation;
static unsigned long *vmid_map;
static DEFINE_PER_CPU(atomic64_t, active_vmids);
static DEFINE_PER_CPU(u64, reserved_vmids);
#define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0))
#define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits)
#define NUM_USER_VMIDS VMID_FIRST_VERSION
#define vmid2idx(vmid) ((vmid) & ~VMID_MASK)
#define idx2vmid(idx) vmid2idx(idx)
/*
* As vmid #0 is always reserved, we will never allocate one
* as below and can be treated as invalid. This is used to
* set the active_vmids on vCPU schedule out.
*/
#define VMID_ACTIVE_INVALID VMID_FIRST_VERSION
#define vmid_gen_match(vmid) \
(!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits))
static void flush_context(void)
{
int cpu;
u64 vmid;
bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
for_each_possible_cpu(cpu) {
vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
/* Preserve reserved VMID */
if (vmid == 0)
vmid = per_cpu(reserved_vmids, cpu);
__set_bit(vmid2idx(vmid), vmid_map);
per_cpu(reserved_vmids, cpu) = vmid;
}
/*
* Unlike ASID allocator, we expect less frequent rollover in
* case of VMIDs. Hence, instead of marking the CPU as
* flush_pending and issuing a local context invalidation on
* the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover.
*/
kvm_call_hyp(__kvm_flush_vm_context);
}
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
{
int cpu;
bool hit = false;
/*
* Iterate over the set of reserved VMIDs looking for a match
* and update to use newvmid (i.e. the same VMID in the current
* generation).
*/
for_each_possible_cpu(cpu) {
if (per_cpu(reserved_vmids, cpu) == vmid) {
hit = true;
per_cpu(reserved_vmids, cpu) = newvmid;
}
}
return hit;
}
static u64 new_vmid(struct kvm_vmid *kvm_vmid)
{
static u32 cur_idx = 1;
u64 vmid = atomic64_read(&kvm_vmid->id);
u64 generation = atomic64_read(&vmid_generation);
if (vmid != 0) {
u64 newvmid = generation | (vmid & ~VMID_MASK);
if (check_update_reserved_vmid(vmid, newvmid)) {
atomic64_set(&kvm_vmid->id, newvmid);
return newvmid;
}
if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) {
atomic64_set(&kvm_vmid->id, newvmid);
return newvmid;
}
}
vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, cur_idx);
if (vmid != NUM_USER_VMIDS)
goto set_vmid;
/* We're out of VMIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(VMID_FIRST_VERSION,
&vmid_generation);
flush_context();
/* We have more VMIDs than CPUs, so this will always succeed */
vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, 1);
set_vmid:
__set_bit(vmid, vmid_map);
cur_idx = vmid;
vmid = idx2vmid(vmid) | generation;
atomic64_set(&kvm_vmid->id, vmid);
return vmid;
}
/* Called from vCPU sched out with preemption disabled */
void kvm_arm_vmid_clear_active(void)
{
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
}
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
{
unsigned long flags;
u64 vmid, old_active_vmid;
vmid = atomic64_read(&kvm_vmid->id);
/*
* Please refer comments in check_and_switch_context() in
* arch/arm64/mm/context.c.
*
* Unlike ASID allocator, we set the active_vmids to
* VMID_ACTIVE_INVALID on vCPU schedule out to avoid
* reserving the VMID space needlessly on rollover.
* Hence explicitly check here for a "!= 0" to
* handle the sync with a concurrent rollover.
*/
old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids));
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
old_active_vmid, vmid))
return;
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
/* Check that our VMID belongs to the current generation. */
vmid = atomic64_read(&kvm_vmid->id);
if (!vmid_gen_match(vmid))
vmid = new_vmid(kvm_vmid);
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
}
/*
* Initialize the VMID allocator
*/
int kvm_arm_vmid_alloc_init(void)
{
kvm_arm_vmid_bits = kvm_get_vmid_bits();
/*
* Expect allocation after rollover to fail if we don't have
* at least one more VMID than CPUs. VMID #0 is always reserved.
*/
WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
sizeof(*vmid_map), GFP_KERNEL);
if (!vmid_map)
return -ENOMEM;
return 0;
}
void kvm_arm_vmid_alloc_free(void)
{
kfree(vmid_map);
}
......@@ -29,6 +29,11 @@ struct kvm_pmu {
struct irq_work overflow_work;
};
struct arm_pmu_entry {
struct list_head entry;
struct arm_pmu *arm_pmu;
};
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static __always_inline bool kvm_arm_support_pmu_v3(void)
......
......@@ -13,14 +13,11 @@
#define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1)
#define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2)
#define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0)
#define KVM_ARM_PSCI_1_1 PSCI_VERSION(1, 1)
#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0
#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_1
/*
* We need the KVM pointer independently from the vcpu as we can call
* this from HYP, and need to apply kern_hyp_va on it...
*/
static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
static inline int kvm_psci_version(struct kvm_vcpu *vcpu)
{
/*
* Our PSCI implementation stays the same across versions from
......
......@@ -864,7 +864,7 @@ struct perf_event_context {
#define PERF_NR_CONTEXTS 4
/**
* struct perf_event_cpu_context - per cpu event context structure
* struct perf_cpu_context - per cpu event context structure
*/
struct perf_cpu_context {
struct perf_event_context ctx;
......
......@@ -82,6 +82,10 @@
#define PSCI_0_2_TOS_UP_NO_MIGRATE 1
#define PSCI_0_2_TOS_MP 2
/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */
#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0
#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U
/* PSCI version decoding (independent of PSCI version) */
#define PSCI_VERSION_MAJOR_SHIFT 16
#define PSCI_VERSION_MINOR_MASK \
......
......@@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
......
......@@ -23,7 +23,7 @@
#define SPSR_D (1 << 9)
#define SPSR_SS (1 << 21)
extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
static volatile uint64_t sw_bp_addr, hw_bp_addr;
static volatile uint64_t wp_addr, wp_data_addr;
static volatile uint64_t svc_addr;
......@@ -47,6 +47,14 @@ static void reset_debug_state(void)
isb();
}
static void enable_os_lock(void)
{
write_sysreg(1, oslar_el1);
isb();
GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
}
static void install_wp(uint64_t addr)
{
uint32_t wcr;
......@@ -99,6 +107,7 @@ static void guest_code(void)
GUEST_SYNC(0);
/* Software-breakpoint */
reset_debug_state();
asm volatile("sw_bp: brk #0");
GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
......@@ -152,6 +161,51 @@ static void guest_code(void)
GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
GUEST_SYNC(6);
/* OS Lock does not block software-breakpoint */
reset_debug_state();
enable_os_lock();
sw_bp_addr = 0;
asm volatile("sw_bp2: brk #0");
GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
GUEST_SYNC(7);
/* OS Lock blocking hardware-breakpoint */
reset_debug_state();
enable_os_lock();
install_hw_bp(PC(hw_bp2));
hw_bp_addr = 0;
asm volatile("hw_bp2: nop");
GUEST_ASSERT_EQ(hw_bp_addr, 0);
GUEST_SYNC(8);
/* OS Lock blocking watchpoint */
reset_debug_state();
enable_os_lock();
write_data = '\0';
wp_data_addr = 0;
install_wp(PC(write_data));
write_data = 'x';
GUEST_ASSERT_EQ(write_data, 'x');
GUEST_ASSERT_EQ(wp_data_addr, 0);
GUEST_SYNC(9);
/* OS Lock blocking single-step */
reset_debug_state();
enable_os_lock();
ss_addr[0] = 0;
install_ss();
ss_idx = 0;
asm volatile("mrs x0, esr_el1\n\t"
"add x0, x0, #1\n\t"
"msr daifset, #8\n\t"
: : : "x0");
GUEST_ASSERT_EQ(ss_addr[0], 0);
GUEST_DONE();
}
......@@ -223,7 +277,7 @@ int main(int argc, char *argv[])
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_EC_SVC64, guest_svc_handler);
for (stage = 0; stage < 7; stage++) {
for (stage = 0; stage < 11; stage++) {
vcpu_run(vm, VCPU_ID);
switch (get_ucall(vm, VCPU_ID, &uc)) {
......
......@@ -760,6 +760,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(2, 0, 0, 15, 5),
ARM64_SYS_REG(2, 0, 0, 15, 6),
ARM64_SYS_REG(2, 0, 0, 15, 7),
ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
......
......@@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args,
uint32_t prio, intid, ap1r;
int i;
/* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
/*
* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
* in descending order, so intid+1 can preempt intid.
*/
for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
......@@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args,
gic_set_priority(intid, prio);
}
/* In a real migration, KVM would restore all GIC state before running
/*
* In a real migration, KVM would restore all GIC state before running
* guest code.
*/
for (i = 0; i < num; i++) {
......@@ -472,10 +474,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc *
guest_restore_active(args, MIN_SPI, 4, f->cmd);
}
static void guest_code(struct test_args args)
static void guest_code(struct test_args *args)
{
uint32_t i, nr_irqs = args.nr_irqs;
bool level_sensitive = args.level_sensitive;
uint32_t i, nr_irqs = args->nr_irqs;
bool level_sensitive = args->level_sensitive;
struct kvm_inject_desc *f, *inject_fns;
gic_init(GIC_V3, 1, dist, redist);
......@@ -484,11 +486,11 @@ static void guest_code(struct test_args args)
gic_irq_enable(i);
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !args.level_sensitive);
gic_irq_set_config(i, !level_sensitive);
gic_set_eoi_split(args.eoi_split);
gic_set_eoi_split(args->eoi_split);
reset_priorities(&args);
reset_priorities(args);
gic_set_priority_mask(CPU_PRIO_MASK);
inject_fns = level_sensitive ? inject_level_fns
......@@ -497,17 +499,18 @@ static void guest_code(struct test_args args)
local_irq_enable();
/* Start the tests. */
for_each_supported_inject_fn(&args, inject_fns, f) {
test_injection(&args, f);
test_preemption(&args, f);
test_injection_failure(&args, f);
for_each_supported_inject_fn(args, inject_fns, f) {
test_injection(args, f);
test_preemption(args, f);
test_injection_failure(args, f);
}
/* Restore the active state of IRQs. This would happen when live
/*
* Restore the active state of IRQs. This would happen when live
* migrating IRQs in the middle of being handled.
*/
for_each_supported_activate_fn(&args, set_active_fns, f)
test_restore_active(&args, f);
for_each_supported_activate_fn(args, set_active_fns, f)
test_restore_active(args, f);
GUEST_DONE();
}
......@@ -573,8 +576,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
kvm_gsi_routing_write(vm, routing);
} else {
ret = _kvm_gsi_routing_write(vm, routing);
/* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
if (intid >= KVM_IRQCHIP_NUM_PINS)
/* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
TEST_ASSERT(ret != 0 && errno == EINVAL,
"Bad intid %u did not cause KVM_SET_GSI_ROUTING "
"error: rc: %i errno: %i", intid, ret, errno);
......@@ -739,6 +742,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
int gic_fd;
struct kvm_vm *vm;
struct kvm_inject_args inject_args;
vm_vaddr_t args_gva;
struct test_args args = {
.nr_irqs = nr_irqs,
......@@ -757,7 +761,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_init_descriptor_tables(vm, VCPU_ID);
/* Setup the guest args page (so it gets the args). */
vcpu_args_set(vm, 0, 1, args);
args_gva = vm_vaddr_alloc_page(vm);
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vm, 0, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
GICD_BASE_GPA, GICR_BASE_GPA);
......@@ -841,7 +847,8 @@ int main(int argc, char **argv)
}
}
/* If the user just specified nr_irqs and/or gic_version, then run all
/*
* If the user just specified nr_irqs and/or gic_version, then run all
* combinations.
*/
if (default_args) {
......
......@@ -18,6 +18,12 @@
#include "test_util.h"
#include "perf_test_util.h"
#include "guest_modes.h"
#ifdef __aarch64__
#include "aarch64/vgic.h"
#define GICD_BASE_GPA 0x8000000ULL
#define GICR_BASE_GPA 0x80A0000ULL
#endif
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
......@@ -200,6 +206,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, &cap);
}
#ifdef __aarch64__
vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
#endif
/* Start the iterations */
iteration = 0;
host_quit = false;
......
......@@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split)
{
uint32_t val;
/* All other fields are read-only, so no need to read CTLR first. In
/*
* All other fields are read-only, so no need to read CTLR first. In
* fact, the kernel does the same.
*/
val = split ? (1U << 1) : 0;
......@@ -159,9 +160,10 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset,
uint32_t cpu_or_dist;
GUEST_ASSERT(bits_per_field <= reg_bits);
GUEST_ASSERT(*val < (1U << bits_per_field));
/* Some registers like IROUTER are 64 bit long. Those are currently not
* supported by readl nor writel, so just asserting here until then.
GUEST_ASSERT(!write || *val < (1U << bits_per_field));
/*
* This function does not support 64 bit accesses. Just asserting here
* until we implement readq/writeq.
*/
GUEST_ASSERT(reg_bits == 32);
......
......@@ -140,9 +140,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
uint64_t val;
bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
/* Check that the addr part of the attr is within 32 bits. */
assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
: KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
......@@ -152,7 +149,11 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
attr += SZ_64K;
}
/* All calls will succeed, even with invalid intid's, as long as the
/* Check that the addr part of the attr is within 32 bits. */
assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
/*
* All calls will succeed, even with invalid intid's, as long as the
* addr part of the attr is within 32 bits (checked above). An invalid
* intid will just make the read/writes point to above the intended
* register space (i.e., ICPENDR after ISPENDR).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment