Commit 519f526d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
 "ARM:
   - Full debug support for arm64
   - Active state switching for timer interrupts
   - Lazy FP/SIMD save/restore for arm64
   - Generic ARMv8 target

  PPC:
   - Book3S: A few bug fixes
   - Book3S: Allow micro-threading on POWER8

  x86:
   - Compiler warnings

  Generic:
   - Adaptive polling for guest halt"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits)
  kvm: irqchip: fix memory leak
  kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF
  KVM: trace kvm_halt_poll_ns grow/shrink
  KVM: dynamic halt-polling
  KVM: make halt_poll_ns per-vCPU
  Silence compiler warning in arch/x86/kvm/emulate.c
  kvm: compile process_smi_save_seg_64() only for x86_64
  KVM: x86: avoid uninitialized variable warning
  KVM: PPC: Book3S: Fix typo in top comment about locking
  KVM: PPC: Book3S: Fix size of the PSPB register
  KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set
  KVM: PPC: Book3S HV: Fix race in starting secondary threads
  KVM: PPC: Book3S: correct width in XER handling
  KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation
  KVM: PPC: Book3S HV: Fix preempted vcore list locking
  KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD
  KVM: PPC: Book3S HV: Fix bug in dirty page tracking
  KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE
  KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
  KVM: PPC: Book3S HV: Make use of unused threads when running guests
  ...
parents 06ab838c ba60c41a
......@@ -2671,7 +2671,7 @@ handled.
4.87 KVM_SET_GUEST_DEBUG
Capability: KVM_CAP_SET_GUEST_DEBUG
Architectures: x86, s390, ppc
Architectures: x86, s390, ppc, arm64
Type: vcpu ioctl
Parameters: struct kvm_guest_debug (in)
Returns: 0 on success; -1 on error
......@@ -2693,8 +2693,8 @@ when running. Common control bits are:
The top 16 bits of the control field are architecture specific control
flags which can include the following:
- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
- KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
- KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
......@@ -2709,6 +2709,11 @@ updated to the correct (supplied) values.
The second part of the structure is architecture specific and
typically contains a set of debug registers.
For arm64 the number of debug registers is implementation defined and
can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
indicating the number of supported registers.
When debug events exit the main run loop with the reason
KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
structure containing architecture specific debug information.
......@@ -3111,11 +3116,13 @@ data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
where kvm expects application code to place the data for the next
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
/* KVM_EXIT_DEBUG */
struct {
struct kvm_debug_exit_arch arch;
} debug;
Unused.
If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
for which architecture specific information is returned.
/* KVM_EXIT_MMIO */
struct {
......
......@@ -231,4 +231,9 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
#endif /* __ARM_KVM_HOST_H__ */
......@@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
kvm_vgic_early_init(kvm);
kvm_timer_init(kvm);
/* Mark the initial VMID generation invalid */
......@@ -249,6 +250,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
kvm_vgic_vcpu_early_init(vcpu);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
......@@ -278,6 +280,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
kvm_arm_reset_debug_ptr(vcpu);
return 0;
}
......@@ -301,13 +305,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_arm_set_running_vcpu(NULL);
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
......@@ -528,10 +525,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (vcpu->arch.pause)
vcpu_pause(vcpu);
kvm_vgic_flush_hwstate(vcpu);
/*
* Disarming the background timer must be done in a
* preemptible context, as this call may sleep.
*/
kvm_timer_flush_hwstate(vcpu);
/*
* Preparing the interrupts to be injected also
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
preempt_disable();
kvm_vgic_flush_hwstate(vcpu);
local_irq_disable();
/*
......@@ -544,12 +551,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
local_irq_enable();
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
continue;
}
kvm_arm_setup_debug(vcpu);
/**************************************************************
* Enter the guest
*/
......@@ -564,6 +573,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Back from guest
*************************************************************/
kvm_arm_clear_debug(vcpu);
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
......@@ -586,11 +597,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
kvm_guest_exit();
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
preempt_enable();
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
ret = handle_exit(vcpu, run, ret);
}
......@@ -921,6 +933,8 @@ static void cpu_init_hyp_mode(void *dummy)
vector_ptr = (unsigned long)__kvm_hyp_vector;
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
kvm_arm_init_debug();
}
static int hyp_init_cpu_notify(struct notifier_block *self,
......
......@@ -290,3 +290,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
return -EINVAL;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
......@@ -361,10 +361,6 @@ hyp_hvc:
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
#ifdef CONFIG_VFPv3
cmp r0, #HSR_EC_CP_0_13
beq switch_to_guest_vfp
#endif
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
......@@ -378,7 +374,10 @@ hyp_hvc:
cmp r2, #0
bne guest_trap @ Guest called HVC
host_switch_to_hyp:
/*
* Getting here means host called HVC, we shift parameters and branch
* to Hyp function.
*/
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
......@@ -409,6 +408,10 @@ guest_trap:
@ Check if we need the fault information
lsr r1, r1, #HSR_EC_SHIFT
#ifdef CONFIG_VFPv3
cmp r1, #HSR_EC_CP_0_13
beq switch_to_guest_vfp
#endif
cmp r1, #HSR_EC_IABT
mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
beq 2f
......@@ -477,7 +480,6 @@ guest_trap:
*/
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
load_vcpu @ Load VCPU pointer to r0
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
......
......@@ -77,7 +77,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_reset_coprocs(vcpu);
/* Reset arch_timer context */
kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
return 0;
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
......@@ -16,6 +16,8 @@
#ifndef __ASM_HW_BREAKPOINT_H
#define __ASM_HW_BREAKPOINT_H
#include <asm/cputype.h>
#ifdef __KERNEL__
struct arch_hw_breakpoint_ctrl {
......@@ -132,5 +134,17 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
extern struct pmu perf_ops_bp;
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
}
#endif /* __KERNEL__ */
#endif /* __ASM_BREAKPOINT_H */
......@@ -171,10 +171,13 @@
#define HSTR_EL2_TTEE (1 << 16)
#define HSTR_EL2_T(x) (1 << x)
/* Hyp Coproccessor Trap Register Shifts */
#define CPTR_EL2_TFP_SHIFT 10
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << 10)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_TDRA (1 << 11)
......
......@@ -46,24 +46,16 @@
#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
#define PAR_EL1 21 /* Physical Address Register */
#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
#define DBGBCR15_EL1 38
#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
#define DBGBVR15_EL1 54
#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
#define DBGWCR15_EL1 70
#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
#define DBGWVR15_EL1 86
#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */
/* 32bit specific registers. Keep them at the end of the range */
#define DACR32_EL2 88 /* Domain Access Control Register */
#define IFSR32_EL2 89 /* Instruction Fault Status Register */
#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
#define NR_SYS_REGS 94
#define DACR32_EL2 24 /* Domain Access Control Register */
#define IFSR32_EL2 25 /* Instruction Fault Status Register */
#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */
#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */
#define TEECR32_EL1 28 /* ThumbEE Configuration Register */
#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */
#define NR_SYS_REGS 30
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
......@@ -132,6 +124,8 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u32 __kvm_get_mdcr_el2(void);
#endif
#endif /* __ARM_KVM_ASM_H__ */
......@@ -103,15 +103,34 @@ struct kvm_vcpu_arch {
/* HYP configuration */
u64 hcr_el2;
u32 mdcr_el2;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
/* Debug state */
/* Guest debug state */
u64 debug_flags;
/*
* We maintain more than a single set of debug registers to support
* debugging the guest from the host and to maintain separate host and
* guest state during world switches. vcpu_debug_state are the debug
* registers of the vcpu as the guest sees them. host_debug_state are
* the host registers which are saved and restored during
* world switches. external_debug_state contains the debug
* values we want to debug the guest. This is set via the
* KVM_SET_GUEST_DEBUG ioctl.
*
* debug_ptr points to the set of debug registers that should be loaded
* onto the hardware when running the guest.
*/
struct kvm_guest_debug_arch *debug_ptr;
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
struct kvm_guest_debug_arch host_debug_state;
/* VGIC state */
struct vgic_cpu vgic_cpu;
......@@ -122,6 +141,17 @@ struct kvm_vcpu_arch {
* here.
*/
/*
* Guest registers we preserve during guest debugging.
*
* These shadow registers are updated by the kvm_handle_sys_reg
* trap handler if the guest accesses or updates them while we
* are using guest debug.
*/
struct {
u32 mdscr_el1;
} guest_debug_preserved;
/* Don't run the guest */
bool pause;
......@@ -216,15 +246,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
hyp_stack_ptr, vector_ptr);
}
struct vgic_sr_vectors {
void *save_vgic;
void *restore_vgic;
};
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_HOST_H__ */
......@@ -53,14 +53,20 @@ struct kvm_regs {
struct user_fpsimd_state fp_regs;
};
/* Supported Processor Types */
/*
* Supported CPU Targets - Adding a new target type is not recommended,
* unless there are some special registers not supported by the
* genericv8 syreg table.
*/
#define KVM_ARM_TARGET_AEM_V8 0
#define KVM_ARM_TARGET_FOUNDATION_V8 1
#define KVM_ARM_TARGET_CORTEX_A57 2
#define KVM_ARM_TARGET_XGENE_POTENZA 3
#define KVM_ARM_TARGET_CORTEX_A53 4
/* Generic ARM v8 target */
#define KVM_ARM_TARGET_GENERIC_V8 5
#define KVM_ARM_NUM_TARGETS 5
#define KVM_ARM_NUM_TARGETS 6
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
......@@ -100,12 +106,39 @@ struct kvm_sregs {
struct kvm_fpu {
};
/*
* See v8 ARM ARM D7.3: Debug Registers
*
* The architectural limit is 16 debug registers of each type although
* in practice there are usually less (see ID_AA64DFR0_EL1).
*
* Although the control registers are architecturally defined as 32
* bits wide we use a 64 bit structure here to keep parity with
* KVM_GET/SET_ONE_REG behaviour which treats all system registers as
* 64 bit values. It also allows for the possibility of the
* architecture expanding the control registers without having to
* change the userspace ABI.
*/
#define KVM_ARM_MAX_DBG_REGS 16
struct kvm_guest_debug_arch {
__u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
__u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
__u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
__u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
};
struct kvm_debug_exit_arch {
__u32 hsr;
__u64 far; /* used for watchpoints */
};
/*
* Architecture specific defines for kvm_guest_debug->control
*/
#define KVM_GUESTDBG_USE_SW_BP (1 << 16)
#define KVM_GUESTDBG_USE_HW (1 << 17)
struct kvm_sync_regs {
};
......
......@@ -116,17 +116,22 @@ int main(void)
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic));
DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
......
......@@ -48,18 +48,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
static int core_num_brps;
static int core_num_wrps;
/* Determine number of BRP registers available. */
static int get_num_brps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
}
/* Determine number of WRP registers available. */
static int get_num_wrps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
}
int hw_breakpoint_slots(int type)
{
/*
......
......@@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
......
/*
* Debug and Guest Debug support
*
* Copyright (C) 2015 - Linaro Ltd
* Author: Alex Bennée <alex.bennee@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/kvm_host.h>
#include <linux/hw_breakpoint.h>
#include <asm/debug-monitors.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
#include "trace.h"
/* These are the bits of MDSCR_EL1 we may manipulate */
#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
DBG_MDSCR_KDE | \
DBG_MDSCR_MDE)
static DEFINE_PER_CPU(u32, mdcr_el2);
/**
* save/restore_guest_debug_regs
*
* For some debug operations we need to tweak some guest registers. As
* a result we need to save the state of those registers before we
* make those modifications.
*
* Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
* after we have restored the preserved value to the main context.
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
}
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
{
vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
vcpu_sys_reg(vcpu, MDSCR_EL1));
}
/**
* kvm_arm_init_debug - grab what we need for debug
*
* Currently the sole task of this function is to retrieve the initial
* value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
* presumably been set-up by some knowledgeable bootcode.
*
* It is called once per-cpu during CPU hyp initialisation.
*/
void kvm_arm_init_debug(void)
{
__this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
}
/**
* kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
*/
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
{
vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
}
/**
* kvm_arm_setup_debug - set up debug related stuff
*
* @vcpu: the vcpu pointer
*
* This is called before each entry into the hypervisor to setup any
* debug related registers. Currently this just ensures we will trap
* access to:
* - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
* - Debug ROM Address (MDCR_EL2_TDRA)
* - OS related registers (MDCR_EL2_TDOSA)
*
* Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
* flag on vcpu->arch.debug_flags). Since the guest must not interfere
* with the hardware state when debugging the guest, we must ensure that
* trapping is enabled whenever we are debugging the guest using the
* debug registers.
*/
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
MDCR_EL2_TPMCR |
MDCR_EL2_TDRA |
MDCR_EL2_TDOSA);
/* Is Guest debugging in effect? */
if (vcpu->guest_debug) {
/* Route all software debug exceptions to EL2 */
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
/* Save guest debug state */
save_guest_debug_regs(vcpu);
/*
* Single Step (ARM ARM D2.12.3 The software step state
* machine)
*
* If we are doing Single Step we need to manipulate
* the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
* step has occurred the hypervisor will trap the
* debug exception and we return to userspace.
*
* If the guest attempts to single step its userspace
* we would have to deal with a trapped exception
* while in the guest kernel. Because this would be
* hard to unwind we suppress the guest's ability to
* do so by masking MDSCR_EL.SS.
*
* This confuses guest debuggers which use
* single-step behind the scenes but everything
* returns to normal once the host is no longer
* debugging the system.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
} else {
vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
}
trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
/*
* HW Breakpoints and watchpoints
*
* We simply switch the debug_ptr to point to our new
* external_debug_state which has been populated by the
* debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
* mechanism ensures the registers are updated on the
* world switch.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
trap_debug = true;
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0],
&vcpu->arch.debug_ptr->dbg_bvr[0]);
trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
&vcpu->arch.debug_ptr->dbg_wcr[0],
&vcpu->arch.debug_ptr->dbg_wvr[0]);
}
}
BUG_ON(!vcpu->guest_debug &&
vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
/* Trap debug register access */
if (trap_debug)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
}
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
{
trace_kvm_arm_clear_debug(vcpu->guest_debug);
if (vcpu->guest_debug) {
restore_guest_debug_regs(vcpu);
/*
* If we were using HW debug we need to restore the
* debug_ptr to the guest debug state.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
kvm_arm_reset_debug_ptr(vcpu);
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
&vcpu->arch.debug_ptr->dbg_bcr[0],
&vcpu->arch.debug_ptr->dbg_bvr[0]);
trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
&vcpu->arch.debug_ptr->dbg_wcr[0],
&vcpu->arch.debug_ptr->dbg_wvr[0]);
}
}
}
......@@ -32,6 +32,8 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include "trace.h"
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
......@@ -293,7 +295,8 @@ int __attribute_const__ kvm_target_cpu(void)
break;
};
return -EINVAL;
/* Return a default generic target */
return KVM_ARM_TARGET_GENERIC_V8;
}
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
......@@ -331,3 +334,41 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
return -EINVAL;
}
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
/**
* kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
* @kvm: pointer to the KVM struct
* @kvm_guest_debug: the ioctl data buffer
*
* This sets up and enables the VM for guest debugging. Userspace
* passes in a control flag to enable different debug types and
* potentially other architecture specific information in the rest of
* the structure.
*/
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
trace_kvm_set_guest_debug(vcpu, dbg->control);
if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* Hardware assisted Break and Watch points */
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
vcpu->arch.external_debug_state = dbg->arch;
}
} else {
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
}
return 0;
}
......@@ -82,6 +82,45 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
/**
* kvm_handle_guest_debug - handle a debug exception instruction
*
* @vcpu: the vcpu pointer
* @run: access to the kvm_run structure for results
*
* We route all debug exceptions through the same handler. If both the
* guest and host are using the same debug facilities it will be up to
* userspace to re-inject the correct exception for guest delivery.
*
* @return: 0 (while setting run->exit_reason), -1 for error
*/
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
u32 hsr = kvm_vcpu_get_hsr(vcpu);
int ret = 0;
run->exit_reason = KVM_EXIT_DEBUG;
run->debug.arch.hsr = hsr;
switch (hsr >> ESR_ELx_EC_SHIFT) {
case ESR_ELx_EC_WATCHPT_LOW:
run->debug.arch.far = vcpu->arch.fault.far_el2;
/* fall through */
case ESR_ELx_EC_SOFTSTP_LOW:
case ESR_ELx_EC_BREAKPT_LOW:
case ESR_ELx_EC_BKPT32:
case ESR_ELx_EC_BRK64:
break;
default:
kvm_err("%s: un-handled case hsr: %#08x\n",
__func__, (unsigned int) hsr);
ret = -1;
break;
}
return ret;
}
static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
......@@ -96,6 +135,11 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
[ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
};
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
......
This diff is collapsed.
......@@ -22,6 +22,7 @@
#include <linux/errno.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hw_breakpoint.h>
#include <kvm/arm_arch_timer.h>
......@@ -56,6 +57,12 @@ static bool cpu_has_32bit_el1(void)
return !!(pfr0 & 0x20);
}
/**
* kvm_arch_dev_ioctl_check_extension
*
* We currently assume that the number of HW registers is uniform
* across all CPUs (see cpuinfo_sanity_check).
*/
int kvm_arch_dev_ioctl_check_extension(long ext)
{
int r;
......@@ -64,6 +71,15 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
case KVM_CAP_ARM_EL1_32BIT:
r = cpu_has_32bit_el1();
break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = get_num_brps();
break;
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = get_num_wrps();
break;
case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
default:
r = 0;
}
......@@ -105,7 +121,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_reset_sys_regs(vcpu);
/* Reset timer */
kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
return 0;
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
......@@ -38,6 +38,8 @@
#include "sys_regs.h"
#include "trace.h"
/*
* All of this file is extremly similar to the ARM coproc.c, but the
* types are different. My gut feeling is that it should be pretty
......@@ -208,9 +210,217 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
}
trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
return true;
}
/*
* reg_to_dbg/dbg_to_reg
*
* A 32 bit write to a debug register leave top bits alone
* A 32 bit read from a debug register only returns the bottom bits
*
* All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
* hyp.S code switches between host and guest values in future.
*/
static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
u64 *dbg_reg)
{
u64 val = *vcpu_reg(vcpu, p->Rt);
if (p->is_32bit) {
val &= 0xffffffffUL;
val |= ((*dbg_reg >> 32) << 32);
}
*dbg_reg = val;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
}
static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
u64 *dbg_reg)
{
u64 val = *dbg_reg;
if (p->is_32bit)
val &= 0xffffffffUL;
*vcpu_reg(vcpu, p->Rt) = val;
}
static inline bool trap_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
if (p->is_write)
reg_to_dbg(vcpu, p, dbg_reg);
else
dbg_to_reg(vcpu, p, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
return true;
}
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static inline void reset_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
}
static inline bool trap_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
if (p->is_write)
reg_to_dbg(vcpu, p, dbg_reg);
else
dbg_to_reg(vcpu, p, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
return true;
}
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static inline void reset_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
}
static inline bool trap_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
if (p->is_write)
reg_to_dbg(vcpu, p, dbg_reg);
else
dbg_to_reg(vcpu, p, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write,
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
return true;
}
static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static inline void reset_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
}
static inline bool trap_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
if (p->is_write)
reg_to_dbg(vcpu, p, dbg_reg);
else
dbg_to_reg(vcpu, p, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
return true;
}
static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
return 0;
}
static inline void reset_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
}
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 amair;
......@@ -240,16 +450,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \
/* DBGBCRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \
/* DBGWVRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \
trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \
/* DBGWCRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
/*
* Architected system registers.
......@@ -516,28 +726,57 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
return true;
}
#define DBG_BCR_BVR_WCR_WVR(n) \
/* DBGBVRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \
NULL, (cp14_DBGBVR0 + (n) * 2) }, \
/* DBGBCRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \
NULL, (cp14_DBGBCR0 + (n) * 2) }, \
/* DBGWVRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \
NULL, (cp14_DBGWVR0 + (n) * 2) }, \
/* DBGWCRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \
NULL, (cp14_DBGWCR0 + (n) * 2) }
#define DBGBXVR(n) \
{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \
NULL, cp14_DBGBXVR0 + n * 2 }
/* AArch32 debug register mappings
*
* AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
* AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
*
* All control registers and watchpoint value registers are mapped to
* the lower 32 bits of their AArch64 equivalents. We share the trap
* handlers with the above AArch64 code which checks what mode the
* system is in.
*/
static inline bool trap_xvr(struct kvm_vcpu *vcpu,
const struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
if (p->is_write) {
u64 val = *dbg_reg;
val &= 0xffffffffUL;
val |= *vcpu_reg(vcpu, p->Rt) << 32;
*dbg_reg = val;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
*vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
}
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
return true;
}
#define DBG_BCR_BVR_WCR_WVR(n) \
/* DBGBVRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
/* DBGBCRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
/* DBGWVRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
/* DBGWCRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
#define DBGBXVR(n) \
{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
/*
* Trapped cp14 registers. We generally ignore most of the external
* debug, on the principle that they don't really make sense to a
* guest. Revisit this one day, whould this principle change.
* guest. Revisit this one day, would this principle change.
*/
static const struct sys_reg_desc cp14_regs[] = {
/* DBGIDR */
......@@ -999,6 +1238,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);
trace_kvm_handle_sys_reg(esr);
params.is_aarch32 = false;
params.is_32bit = false;
params.Op0 = (esr >> 20) & 3;
......@@ -1303,6 +1544,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (!r)
return get_invariant_sys_reg(reg->id, uaddr);
if (r->get_user)
return (r->get_user)(vcpu, r, reg, uaddr);
return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
}
......@@ -1321,6 +1565,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (!r)
return set_invariant_sys_reg(reg->id, uaddr);
if (r->set_user)
return (r->set_user)(vcpu, r, reg, uaddr);
return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
}
......
......@@ -55,6 +55,12 @@ struct sys_reg_desc {
/* Value (usually reset value) */
u64 val;
/* Custom get/set_user functions, fallback to generic if NULL */
int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr);
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr);
};
static inline void print_sys_reg_instr(const struct sys_reg_params *p)
......
......@@ -94,6 +94,8 @@ static int __init sys_reg_genericv8_init(void)
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
&genericv8_target_table);
return 0;
}
......
......@@ -44,6 +44,129 @@ TRACE_EVENT(kvm_hvc_arm64,
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
TRACE_EVENT(kvm_arm_setup_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_clear_debug,
TP_PROTO(__u32 guest_debug),
TP_ARGS(guest_debug),
TP_STRUCT__entry(
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->guest_debug = guest_debug;
),
TP_printk("flags: 0x%08x", __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_set_dreg32,
TP_PROTO(const char *name, __u32 value),
TP_ARGS(name, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(__u32, value)
),
TP_fast_assign(
__entry->name = name;
__entry->value = value;
),
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
);
TRACE_EVENT(kvm_arm_set_regset,
TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
TP_ARGS(type, len, control, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, len)
__array(u64, ctrls, 16)
__array(u64, values, 16)
),
TP_fast_assign(
__entry->name = type;
__entry->len = len;
memcpy(__entry->ctrls, control, len << 3);
memcpy(__entry->values, value, len << 3);
),
TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
__print_array(__entry->values, __entry->len, sizeof(__u64)))
);
TRACE_EVENT(trap_reg,
TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
TP_ARGS(fn, reg, is_write, write_value),
TP_STRUCT__entry(
__field(const char *, fn)
__field(int, reg)
__field(bool, is_write)
__field(u64, write_value)
),
TP_fast_assign(
__entry->fn = fn;
__entry->reg = reg;
__entry->is_write = is_write;
__entry->write_value = write_value;
),
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
TP_STRUCT__entry(
__field(unsigned long, hsr)
),
TP_fast_assign(
__entry->hsr = hsr;
),
TP_printk("HSR 0x%08lx", __entry->hsr)
);
TRACE_EVENT(kvm_set_guest_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
#endif /* _TRACE_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
......
......@@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
bool *writable);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
......@@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
vcpu->arch.xer = val;
}
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.xer;
}
......
......@@ -25,6 +25,12 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
/* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8
/* Maximum number of subcores per physical core */
#define MAX_SUBCORES 4
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
......@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
#else /*__ASSEMBLY__ */
struct kvmppc_vcore;
/* Struct used for coordinating micro-threading (split-core) mode changes */
struct kvm_split_mode {
unsigned long rpr;
unsigned long pmmar;
unsigned long ldbar;
u8 subcore_size;
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
};
/*
* This struct goes in the PACA on 64-bit processors. It is used
* to store host state that needs to be saved when we enter a guest
......@@ -100,6 +119,7 @@ struct kvmppc_host_state {
u64 host_spurr;
u64 host_dscr;
u64 dec_expires;
struct kvm_split_mode *kvm_split_mode;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
u64 cfar;
......@@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu {
bool in_use;
ulong gpr[14];
u32 cr;
u32 xer;
ulong xer;
ulong ctr;
ulong lr;
ulong pc;
......
......@@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
return vcpu->arch.cr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
vcpu->arch.xer = val;
}
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.xer;
}
......
......@@ -205,8 +205,10 @@ struct revmap_entry {
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_CHG_SHIFT 48
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
......@@ -278,7 +280,9 @@ struct kvmppc_vcore {
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct kvmppc_vcore *master_vcore;
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
wait_queue_head_t wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
......@@ -300,12 +304,21 @@ struct kvmppc_vcore {
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
/* Values for vcore_state */
/* This bit is used when a vcore exit is triggered from outside the vcore */
#define VCORE_EXIT_REQ 0x10000
/*
* Values for vcore_state.
* Note that these are arranged such that lower values
* (< VCORE_SLEEPING) don't require stolen time accounting
* on load/unload, and higher values do.
*/
#define VCORE_INACTIVE 0
#define VCORE_SLEEPING 1
#define VCORE_PREEMPT 2
#define VCORE_RUNNING 3
#define VCORE_EXITING 4
#define VCORE_PREEMPT 1
#define VCORE_PIGGYBACK 2
#define VCORE_SLEEPING 3
#define VCORE_RUNNING 4
#define VCORE_EXITING 5
/*
* Struct used to manage memory for a virtual processor area
......@@ -473,7 +486,7 @@ struct kvm_vcpu_arch {
ulong ciabr;
ulong cfar;
ulong ppr;
ulong pspb;
u32 pspb;
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
......@@ -619,6 +632,7 @@ struct kvm_vcpu_arch {
int trap;
int state;
int ptid;
int thread_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
......
......@@ -287,7 +287,7 @@
/* POWER8 Micro Partition Prefetch (MPP) parameters */
/* Address mask is common for LOGMPP instruction and MPPR SPR */
#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
/* Bits 60 and 61 of MPP SPR should be set to one of the following */
/* Aborting the fetch is indeed setting 00 in the table size bits */
......
......@@ -511,6 +511,8 @@ int main(void)
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
......@@ -673,7 +675,14 @@ int main(void)
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
......
......@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
If unsure, say N.
config KVM_BOOK3S_64_HV
tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
select MMU_NOTIFIER
select CMA
---help---
Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and PPC970 processors that have
virtual machines on POWER7 and newer processors that have
hypervisor mode available to the host.
If you say Y here, KVM will use the hardware virtualization
......@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
guest operating systems will run at full hardware speed
using supervisor and user modes. However, this also means
that KVM is not usable under PowerVM (pHyp), is only usable
on POWER7 (or later) processors and PPC970-family processors,
and cannot emulate a different processor from the host processor.
on POWER7 or later processors, and cannot emulate a
different processor from the host processor.
If unsure, say N.
......
......@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
}
int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int deliver = 1;
int vec = 0;
......
......@@ -26,6 +26,7 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
#include "book3s.h"
/* #define DEBUG_MMU */
/* #define DEBUG_SR */
......
......@@ -28,6 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
#include "trace_pr.h"
#include "book3s.h"
#define PTE_SIZE 12
......
......@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
if (rcbits & HPTE_R_C)
kvmppc_update_rmap_change(rmapp, psize);
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
......@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
retry:
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_CHANGED) {
*rmapp &= ~KVMPPC_RMAP_CHANGED;
long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
>> KVMPPC_RMAP_CHG_SHIFT;
*rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
npages_dirty = 1;
if (change_order > PAGE_SHIFT)
npages_dirty = 1ul << (change_order - PAGE_SHIFT);
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
......
......@@ -23,6 +23,7 @@
#include <asm/reg.h>
#include <asm/switch_to.h>
#include <asm/time.h>
#include "book3s.h"
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
......
This diff is collapsed.
......@@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void)
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid;
int threads_running;
int threads_ceded;
int threads_conferring;
u64 stop = get_tb() + 10 * tb_ticks_per_usec;
int rv = H_SUCCESS; /* => don't yield */
set_bit(vcpu->arch.ptid, &vc->conferring_threads);
set_bit(ptid, &vc->conferring_threads);
while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
threads_running = VCORE_ENTRY_MAP(vc);
threads_ceded = vc->napping_threads;
......@@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
break;
}
}
clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
clear_bit(ptid, &vc->conferring_threads);
return rv;
}
......@@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap)
{
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid;
int me, ee;
struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
int me, ee, i;
/* Set our bit in the threads-exiting-guest map in the 0xff00
bits of vcore->entry_exit_map */
......@@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap)
*/
if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
/*
* If we are doing dynamic micro-threading, interrupt the other
* subcores to pull them out of their guests too.
*/
if (!sip)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
vc = sip->master_vcs[i];
if (!vc)
break;
do {
ee = vc->entry_exit_map;
/* Already asked to exit? */
if ((ee >> 8) != 0)
break;
} while (cmpxchg(&vc->entry_exit_map, ee,
ee | VCORE_EXIT_REQ) != ee);
if ((ee >> 8) == 0)
kvmhv_interrupt_vcore(vc, ee);
}
}
......@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/hugetlb.h>
#include <linux/module.h>
#include <linux/log2.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
......@@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/* Update the changed page order field of an rmap entry */
void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
{
unsigned long order;
if (!psize)
return;
order = ilog2(psize);
order <<= KVMPPC_RMAP_CHG_SHIFT;
if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
*rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
}
EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
unsigned long hpte_gr)
{
struct kvm_memory_slot *memslot;
unsigned long *rmap;
unsigned long gfn;
gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (!memslot)
return NULL;
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
return rmap;
}
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
unsigned long hpte_v, unsigned long hpte_r)
{
struct revmap_entry *next, *prev;
unsigned long gfn, ptel, head;
struct kvm_memory_slot *memslot;
unsigned long ptel, head;
unsigned long *rmap;
unsigned long rcbits;
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (!memslot)
rmap = revmap_for_hpte(kvm, hpte_v, ptel);
if (!rmap)
return;
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
lock_rmap(rmap);
head = *rmap & KVMPPC_RMAP_INDEX;
......@@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
}
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
if (rcbits & HPTE_R_C)
kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
unlock_rmap(rmap);
}
......@@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
u64 pte1;
pte1 = be64_to_cpu(hpte[1]);
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte1, pte_index);
rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, pte1);
/*
* The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until
* the HPTE is invalidated and the TLB invalidation
* sequence has completed. This means that when
* removing a HPTE, we need to re-read the HPTE after
* the invalidation sequence has completed in order to
* obtain reliable values of R and C.
*/
remove_revmap_chain(kvm, pte_index, rev, v,
be64_to_cpu(hpte[1]));
}
r = rev->guest_rpte & ~HPTE_GR_RESERVED;
note_hpte_modification(kvm, rev);
......@@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
{
struct kvm *kvm = vcpu->kvm;
__be64 *hpte;
unsigned long v, r, gr;
struct revmap_entry *rev;
unsigned long *rmap;
long ret = H_NOT_FOUND;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hpte[0]);
r = be64_to_cpu(hpte[1]);
if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
goto out;
gr = rev->guest_rpte;
if (rev->guest_rpte & HPTE_R_R) {
rev->guest_rpte &= ~HPTE_R_R;
note_hpte_modification(kvm, rev);
}
if (v & HPTE_V_VALID) {
gr |= r & (HPTE_R_R | HPTE_R_C);
if (r & HPTE_R_R) {
kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
rmap = revmap_for_hpte(kvm, v, gr);
if (rmap) {
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_REFERENCED;
unlock_rmap(rmap);
}
}
}
vcpu->arch.gpr[4] = gr;
ret = H_SUCCESS;
out:
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
{
struct kvm *kvm = vcpu->kvm;
__be64 *hpte;
unsigned long v, r, gr;
struct revmap_entry *rev;
unsigned long *rmap;
long ret = H_NOT_FOUND;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hpte[0]);
r = be64_to_cpu(hpte[1]);
if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
goto out;
gr = rev->guest_rpte;
if (gr & HPTE_R_C) {
rev->guest_rpte &= ~HPTE_R_C;
note_hpte_modification(kvm, rev);
}
if (v & HPTE_V_VALID) {
/* need to make it temporarily absent so C is stable */
hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
kvmppc_invalidate_hpte(kvm, hpte, pte_index);
r = be64_to_cpu(hpte[1]);
gr |= r & (HPTE_R_R | HPTE_R_C);
if (r & HPTE_R_C) {
unsigned long psize = hpte_page_size(v, r);
hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
eieio();
rmap = revmap_for_hpte(kvm, v, gr);
if (rmap) {
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_CHANGED;
kvmppc_update_rmap_change(rmap, psize);
unlock_rmap(rmap);
}
}
}
vcpu->arch.gpr[4] = gr;
ret = H_SUCCESS;
out:
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
......
......@@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
}
/* Check if the core is loaded, if not, too hard */
cpu = vcpu->cpu;
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
/* In SMT cpu will always point to thread 0, we adjust it */
cpu += vcpu->arch.ptid;
smp_mb();
kvmhv_rm_send_ipi(cpu);
......
......@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
......@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
ld r5, HSTATE_KVM_VCORE(r13)
li r0, 0
stb r0, HSTATE_NAPPING(r13)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/* check the wake reason */
bl kvmppc_check_wake_reason
......@@ -315,10 +318,10 @@ kvm_start_guest:
cmpdi r3, 0
bge kvm_no_guest
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
/* get vcore pointer, NULL if we have nothing to run */
ld r5,HSTATE_KVM_VCORE(r13)
cmpdi r5,0
/* if we have no vcore to run, go back to sleep */
beq kvm_no_guest
kvm_secondary_got_guest:
......@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
/* Order load of vcore, ptid etc. after load of vcpu */
/* On thread 0 of a subcore, set HDEC to max */
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
lis r6, 0x7fff
ori r6, r6, 0xffff
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
beq 63f
ld r0, KVM_SPLIT_RPR(r6)
mtspr SPRN_RPR, r0
ld r0, KVM_SPLIT_PMMAR(r6)
mtspr SPRN_PMMAR, r0
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
63:
/* Order load of vcpu after load of vcore */
lwsync
ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_hv_entry
/* Back from the guest, go back to nap */
/* Clear our vcpu pointer so we don't come back in early */
/* Clear our vcpu and vcore pointers so we don't come back in early */
li r0, 0
std r0, HSTATE_KVM_VCPU(r13)
/*
* Once we clear HSTATE_KVM_VCPU(r13), the code in
* Once we clear HSTATE_KVM_VCORE(r13), the code in
* kvmppc_run_core() is going to assume that all our vcpu
* state is visible in memory. This lwsync makes sure
* that that is true.
*/
lwsync
std r0, HSTATE_KVM_VCPU(r13)
std r0, HSTATE_KVM_VCORE(r13)
/*
* At this point we have finished executing in the guest.
......@@ -374,16 +398,71 @@ kvm_no_guest:
b power7_wakeup_loss
53: HMT_LOW
ld r4, HSTATE_KVM_VCPU(r13)
cmpdi r4, 0
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
bne 60f
ld r3, HSTATE_SPLIT_MODE(r13)
cmpdi r3, 0
beq kvm_no_guest
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq kvm_no_guest
HMT_MEDIUM
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
b kvm_no_guest
/*
* Here the primary thread is trying to return the core to
* whole-core mode, so we need to nap.
*/
kvm_unsplit_nap:
/*
* Ensure that secondary doesn't nap when it has
* its vcore pointer set.
*/
sync /* matches smp_mb() before setting split_info.do_nap */
ld r0, HSTATE_KVM_VCORE(r13)
cmpdi r0, 0
bne kvm_no_guest
/* clear any pending message */
BEGIN_FTR_SECTION
lis r6, (PPC_DBELL_SERVER << (63-36))@h
PPC_MSGCLR(6)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Set kvm_split_mode.napped[tid] = 1 */
ld r3, HSTATE_SPLIT_MODE(r13)
li r0, 1
lhz r4, PACAPACAINDEX(r13)
clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
addi r4, r4, KVM_SPLIT_NAPPED
stbx r0, r3, r4
/* Check the do_nap flag again after setting napped[] */
sync
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq 57f
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
mtspr SPRN_LPCR, r4
isync
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0
bne 1b
nap
b .
57: li r0, 0
stbx r0, r3, r4
b kvm_no_guest
/******************************************************************************
* *
* Entry code *
......@@ -854,7 +933,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
cmpwi r0, 0
bne 21f
HMT_LOW
20: lbz r0, VCORE_IN_GUEST(r5)
20: lwz r3, VCORE_ENTRY_EXIT(r5)
cmpwi r3, 0x100
bge no_switch_exit
lbz r0, VCORE_IN_GUEST(r5)
cmpwi r0, 0
beq 20b
HMT_MEDIUM
......@@ -870,7 +952,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
blt hdec_soon
ld r6, VCPU_CTR(r4)
lwz r7, VCPU_XER(r4)
ld r7, VCPU_XER(r4)
mtctr r6
mtxer r7
......@@ -985,9 +1067,13 @@ secondary_too_late:
#endif
11: b kvmhv_switch_to_host
no_switch_exit:
HMT_MEDIUM
li r12, 0
b 12f
hdec_soon:
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
stw r12, VCPU_TRAP(r4)
12: stw r12, VCPU_TRAP(r4)
mr r9, r4
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMEXIT
......@@ -1103,7 +1189,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfctr r3
mfxer r4
std r3, VCPU_CTR(r9)
stw r4, VCPU_XER(r9)
std r4, VCPU_XER(r9)
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
......@@ -1127,6 +1213,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq 4f
b guest_exit_cont
3:
......@@ -1176,6 +1263,11 @@ mc_cont:
ld r9, HSTATE_KVM_VCPU(r13)
lwz r12, VCPU_TRAP(r9)
/* Stop others sending VCPU interrupts to this physical CPU */
li r0, -1
stw r0, VCPU_CPU(r9)
stw r0, VCPU_THREAD_CPU(r9)
/* Save guest CTRL register, set runlatch to 1 */
mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
......@@ -1540,12 +1632,17 @@ kvmhv_switch_to_host:
/* Primary thread waits for all the secondaries to exit guest */
15: lwz r3,VCORE_ENTRY_EXIT(r5)
srwi r0,r3,8
rlwinm r0,r3,32-8,0xff
clrldi r3,r3,56
cmpw r3,r0
bne 15b
isync
/* Did we actually switch to the guest at all? */
lbz r6, VCORE_IN_GUEST(r5)
cmpwi r6, 0
beq 19f
/* Primary thread switches back to host partition */
ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4)
......@@ -1589,7 +1686,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
18:
/* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
lis r8,0x7fff /* MAX_INT@h */
19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
16: ld r8,KVM_HOST_LPCR(r4)
......@@ -1675,7 +1772,7 @@ kvmppc_hdsi:
bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
lwz r8, VCPU_XER(r9)
ld r8, VCPU_XER(r9)
mtctr r7
mtxer r8
mr r4, r9
......@@ -1816,8 +1913,8 @@ hcall_real_table:
.long DOTSYM(kvmppc_h_remove) - hcall_real_table
.long DOTSYM(kvmppc_h_enter) - hcall_real_table
.long DOTSYM(kvmppc_h_read) - hcall_real_table
.long 0 /* 0x10 - H_CLEAR_MOD */
.long 0 /* 0x14 - H_CLEAR_REF */
.long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
.long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
.long DOTSYM(kvmppc_h_protect) - hcall_real_table
.long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
.long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
......
......@@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
return kvmppc_get_field(inst, msb + 32, lsb + 32);
}
bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
{
if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
return false;
......
......@@ -123,7 +123,7 @@ no_dcbz32_on:
PPC_LL r8, SVCPU_CTR(r3)
PPC_LL r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
lwz r11, SVCPU_XER(r3)
PPC_LL r11, SVCPU_XER(r3)
mtctr r8
mtlr r9
......@@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mfctr r8
mflr r9
stw r5, SVCPU_XER(r13)
PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
......
......@@ -41,7 +41,7 @@
* =======
*
* Each ICS has a spin lock protecting the information about the IRQ
* sources and avoiding simultaneous deliveries if the same interrupt.
* sources and avoiding simultaneous deliveries of the same interrupt.
*
* ICP operations are done via a single compare & swap transaction
* (most ICP state fits in the union kvmppc_icp_state)
......
......@@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
#endif
break;
case BOOKE_INTERRUPT_CRITICAL:
kvmppc_fill_pt_regs(&regs);
unknown_exception(&regs);
break;
case BOOKE_INTERRUPT_DEBUG:
......
......@@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu->arch.shared->mas1 =
(vcpu->arch.shared->mas6 & MAS6_SPID0)
| (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
| ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
vcpu->arch.shared->mas2 &= MAS2_EPN;
vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
......
......@@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
......
......@@ -650,6 +650,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
u16 sel;
la = seg_base(ctxt, addr.seg) + addr.ea;
*linear = la;
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
......@@ -693,7 +694,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
}
if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
return emulate_gp(ctxt, 0);
*linear = la;
return X86EMUL_CONTINUE;
bad:
if (addr.seg == VCPU_SREG_SS)
......
......@@ -3309,13 +3309,14 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
walk_shadow_page_lockless_begin(vcpu);
for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
for (shadow_walk_init(&iterator, vcpu, addr),
leaf = root = iterator.level;
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
sptes[leaf - 1] = spte;
leaf--;
if (!is_shadow_present_pte(spte))
break;
......@@ -3329,7 +3330,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
while (root >= leaf) {
while (root > leaf) {
pr_err("------ spte 0x%llx level %d.\n",
sptes[root - 1], root);
root--;
......
......@@ -5943,6 +5943,7 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
}
#ifdef CONFIG_X86_64
static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
{
struct kvm_segment seg;
......@@ -5958,6 +5959,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
put_smstate(u32, buf, offset + 4, seg.limit);
put_smstate(u64, buf, offset + 8, seg.base);
}
#endif
static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
{
......
......@@ -52,13 +52,16 @@ struct arch_timer_cpu {
/* Timer IRQ */
const struct kvm_irq_level *irq;
/* VGIC mapping */
struct irq_phys_map *map;
};
int kvm_timer_hyp_init(void);
void kvm_timer_enable(struct kvm *kvm);
void kvm_timer_init(struct kvm *kvm);
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *irq);
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *irq);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
......
......@@ -95,11 +95,15 @@ enum vgic_type {
#define LR_STATE_ACTIVE (1 << 1)
#define LR_STATE_MASK (3 << 0)
#define LR_EOI_INT (1 << 2)
#define LR_HW (1 << 3)
struct vgic_lr {
u16 irq;
u8 source;
u8 state;
unsigned irq:10;
union {
unsigned hwirq:10;
unsigned source:3;
};
unsigned state:4;
};
struct vgic_vmcr {
......@@ -155,6 +159,19 @@ struct vgic_io_device {
struct kvm_io_device dev;
};
struct irq_phys_map {
u32 virt_irq;
u32 phys_irq;
u32 irq;
bool active;
};
struct irq_phys_map_entry {
struct list_head entry;
struct rcu_head rcu;
struct irq_phys_map map;
};
struct vgic_dist {
spinlock_t lock;
bool in_kernel;
......@@ -252,6 +269,10 @@ struct vgic_dist {
struct vgic_vm_ops vm_ops;
struct vgic_io_device dist_iodev;
struct vgic_io_device *redist_iodevs;
/* Virtual irq to hwirq mapping */
spinlock_t irq_phys_map_lock;
struct list_head irq_phys_map_list;
};
struct vgic_v2_cpu_if {
......@@ -303,6 +324,9 @@ struct vgic_cpu {
struct vgic_v2_cpu_if vgic_v2;
struct vgic_v3_cpu_if vgic_v3;
};
/* Protected by the distributor's irq_phys_map_lock */
struct list_head irq_phys_map_list;
};
#define LR_EMPTY 0xff
......@@ -317,16 +341,25 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
int kvm_vgic_hyp_init(void);
int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_get_max_vcpus(void);
void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level);
int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
struct irq_phys_map *map, bool level);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
int virt_irq, int irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
......
......@@ -270,9 +270,12 @@
#define ICH_LR_EOI (1UL << 41)
#define ICH_LR_GROUP (1UL << 60)
#define ICH_LR_HW (1UL << 61)
#define ICH_LR_STATE (3UL << 62)
#define ICH_LR_PENDING_BIT (1UL << 62)
#define ICH_LR_ACTIVE_BIT (1UL << 63)
#define ICH_LR_PHYS_ID_SHIFT 32
#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
#define ICH_MISR_EOI (1 << 0)
#define ICH_MISR_U (1 << 1)
......
......@@ -75,11 +75,12 @@
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT)
#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
#define GICH_LR_STATE (3 << 28)
#define GICH_LR_PENDING_BIT (1 << 28)
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
#define GICH_LR_HW (1 << 31)
#define GICH_VMCR_CTRL_SHIFT 0
#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
......
......@@ -242,6 +242,7 @@ struct kvm_vcpu {
int sigset_active;
sigset_t sigset;
struct kvm_vcpu_stat stat;
unsigned int halt_poll_ns;
#ifdef CONFIG_HAS_IOMEM
int mmio_needed;
......
......@@ -358,6 +358,36 @@ TRACE_EVENT(
#endif
TRACE_EVENT(kvm_halt_poll_ns,
TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
TP_ARGS(grow, vcpu_id, new, old),
TP_STRUCT__entry(
__field(bool, grow)
__field(unsigned int, vcpu_id)
__field(int, new)
__field(int, old)
),
TP_fast_assign(
__entry->grow = grow;
__entry->vcpu_id = vcpu_id;
__entry->new = new;
__entry->old = old;
),
TP_printk("vcpu %u: halt_poll_ns %d (%s %d)",
__entry->vcpu_id,
__entry->new,
__entry->grow ? "grow" : "shrink",
__entry->old)
);
#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \
trace_kvm_halt_poll_ns(true, vcpu_id, new, old)
#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
#endif /* _TRACE_KVM_MAIN_H */
/* This part must be outside protection */
......
......@@ -237,6 +237,7 @@ struct kvm_run {
__u32 count;
__u64 data_offset; /* relative to kvm_run start */
} io;
/* KVM_EXIT_DEBUG */
struct {
struct kvm_debug_exit_arch arch;
} debug;
......@@ -285,6 +286,7 @@ struct kvm_run {
__u32 data;
__u8 is_write;
} dcr;
/* KVM_EXIT_INTERNAL_ERROR */
struct {
__u32 suberror;
/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
......@@ -295,6 +297,7 @@ struct kvm_run {
struct {
__u64 gprs[32];
} osi;
/* KVM_EXIT_PAPR_HCALL */
struct {
__u64 nr;
__u64 ret;
......@@ -819,6 +822,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_DISABLE_QUIRKS 116
#define KVM_CAP_X86_SMM 117
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -64,10 +64,10 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
int ret;
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer->irq->irq,
timer->irq->level);
kvm_vgic_set_phys_irq_active(timer->map, true);
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
timer->map,
timer->irq->level);
WARN_ON(ret);
}
......@@ -117,7 +117,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
cycle_t cval, now;
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
kvm_vgic_get_phys_irq_active(timer->map))
return false;
cval = timer->cntv_cval;
......@@ -184,10 +185,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
timer_arm(timer, ns);
}
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *irq)
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *irq)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct irq_phys_map *map;
/*
* The vcpu timer irq number cannot be determined in
......@@ -196,6 +198,17 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* vcpu timer irq number when the vcpu is reset.
*/
timer->irq = irq;
/*
* Tell the VGIC that the virtual interrupt is tied to a
* physical interrupt. We do that once per VCPU.
*/
map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
if (WARN_ON(IS_ERR(map)))
return PTR_ERR(map);
timer->map = map;
return 0;
}
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
......@@ -335,6 +348,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer_disarm(timer);
if (timer->map)
kvm_vgic_unmap_phys_irq(vcpu, timer->map);
}
void kvm_timer_enable(struct kvm *kvm)
......
......@@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
lr_desc.state |= LR_STATE_ACTIVE;
if (val & GICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
if (val & GICH_LR_HW) {
lr_desc.state |= LR_HW;
lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT;
}
return lr_desc;
}
......@@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
struct vgic_lr lr_desc)
{
u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
u32 lr_val;
lr_val = lr_desc.irq;
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= GICH_LR_PENDING_BIT;
......@@ -64,6 +70,14 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
if (lr_desc.state & LR_EOI_INT)
lr_val |= GICH_LR_EOI;
if (lr_desc.state & LR_HW) {
lr_val |= GICH_LR_HW;
lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT;
}
if (lr_desc.irq < VGIC_NR_SGIS)
lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
}
......
......@@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
lr_desc.state |= LR_STATE_ACTIVE;
if (val & ICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
if (val & ICH_LR_HW) {
lr_desc.state |= LR_HW;
lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0);
}
return lr_desc;
}
......@@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
* Eventually we want to make this configurable, so we may revisit
* this in the future.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
switch (vcpu->kvm->arch.vgic.vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
lr_val |= ICH_LR_GROUP;
else
lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
break;
case KVM_DEV_TYPE_ARM_VGIC_V2:
if (lr_desc.irq < VGIC_NR_SGIS)
lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
break;
default:
BUG();
}
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= ICH_LR_PENDING_BIT;
......@@ -95,6 +106,10 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= ICH_LR_ACTIVE_BIT;
if (lr_desc.state & LR_EOI_INT)
lr_val |= ICH_LR_EOI;
if (lr_desc.state & LR_HW) {
lr_val |= ICH_LR_HW;
lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
}
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
}
......
This diff is collapsed.
......@@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm,
goto out;
r = -EINVAL;
if (ue->flags)
if (ue->flags) {
kfree(e);
goto out;
}
r = setup_routing_entry(new, e, ue);
if (r)
if (r) {
kfree(e);
goto out;
}
++ue;
}
......
......@@ -66,9 +66,18 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
static unsigned int halt_poll_ns;
/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
static unsigned int halt_poll_ns = 500000;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
/* Default doubles per-vcpu halt_poll_ns. */
static unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, int, S_IRUGO);
/* Default resets per-vcpu halt_poll_ns . */
static unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, int, S_IRUGO);
/*
* Ordering of locks:
*
......@@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
vcpu->halt_poll_ns = 0;
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
......@@ -1906,6 +1916,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
{
int old, val;
old = val = vcpu->halt_poll_ns;
/* 10us base */
if (val == 0 && halt_poll_ns_grow)
val = 10000;
else
val *= halt_poll_ns_grow;
vcpu->halt_poll_ns = val;
trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
}
static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
{
int old, val;
old = val = vcpu->halt_poll_ns;
if (halt_poll_ns_shrink == 0)
val = 0;
else
val /= halt_poll_ns_shrink;
vcpu->halt_poll_ns = val;
trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
}
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
......@@ -1928,10 +1967,11 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
ktime_t start, cur;
DEFINE_WAIT(wait);
bool waited = false;
u64 block_ns;
start = cur = ktime_get();
if (halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
if (vcpu->halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
do {
/*
......@@ -1960,7 +2000,21 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
cur = ktime_get();
out:
trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
if (halt_poll_ns) {
if (block_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
else if (vcpu->halt_poll_ns < halt_poll_ns &&
block_ns < halt_poll_ns)
grow_halt_poll_ns(vcpu);
}
trace_kvm_vcpu_wakeup(block_ns, waited);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment