Commit 493804a6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
 "This includes the 6.4 changes for RISC-V, and a few bugfix patches for
  other architectures. For x86, this closes a longstanding performance
  issue in the newer and (usually) more scalable page table management
  code.

  RISC-V:
   - ONE_REG interface to enable/disable SBI extensions
   - Zbb extension for Guest/VM
   - AIA CSR virtualization

  x86:
   - Fix a long-standing TDP MMU flaw, where unloading roots on a vCPU
     can result in the root being freed even though the root is
     completely valid and can be reused as-is (with a TLB flush).

  s390:
   - A couple of bugfixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: s390: fix race in gmap_make_secure()
  KVM: s390: pv: fix asynchronous teardown for small VMs
  KVM: x86: Preserve TDP MMU roots until they are explicitly invalidated
  RISC-V: KVM: Virtualize per-HART AIA CSRs
  RISC-V: KVM: Use bitmap for irqs_pending and irqs_pending_mask
  RISC-V: KVM: Add ONE_REG interface for AIA CSRs
  RISC-V: KVM: Implement subtype for CSR ONE_REG interface
  RISC-V: KVM: Initial skeletal support for AIA
  RISC-V: KVM: Drop the _MASK suffix from hgatp.VMID mask defines
  RISC-V: Detect AIA CSRs from ISA string
  RISC-V: Add AIA related CSR defines
  RISC-V: KVM: Allow Zbb extension for Guest/VM
  RISC-V: KVM: Add ONE_REG interface to enable/disable SBI extensions
  RISC-V: KVM: Alphabetize selects
  KVM: RISC-V: Retry fault if vma_lookup() results become invalid
parents 7163a211 7a8016d9
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#define _ASM_RISCV_CSR_H #define _ASM_RISCV_CSR_H
#include <asm/asm.h> #include <asm/asm.h>
#include <linux/const.h> #include <linux/bits.h>
/* Status register flags */ /* Status register flags */
#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */ #define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
...@@ -72,7 +72,10 @@ ...@@ -72,7 +72,10 @@
#define IRQ_S_EXT 9 #define IRQ_S_EXT 9
#define IRQ_VS_EXT 10 #define IRQ_VS_EXT 10
#define IRQ_M_EXT 11 #define IRQ_M_EXT 11
#define IRQ_S_GEXT 12
#define IRQ_PMU_OVF 13 #define IRQ_PMU_OVF 13
#define IRQ_LOCAL_MAX (IRQ_PMU_OVF + 1)
#define IRQ_LOCAL_MASK GENMASK((IRQ_LOCAL_MAX - 1), 0)
/* Exception causes */ /* Exception causes */
#define EXC_INST_MISALIGNED 0 #define EXC_INST_MISALIGNED 0
...@@ -127,25 +130,25 @@ ...@@ -127,25 +130,25 @@
#define HGATP32_MODE_SHIFT 31 #define HGATP32_MODE_SHIFT 31
#define HGATP32_VMID_SHIFT 22 #define HGATP32_VMID_SHIFT 22
#define HGATP32_VMID_MASK _AC(0x1FC00000, UL) #define HGATP32_VMID GENMASK(28, 22)
#define HGATP32_PPN _AC(0x003FFFFF, UL) #define HGATP32_PPN GENMASK(21, 0)
#define HGATP64_MODE_SHIFT 60 #define HGATP64_MODE_SHIFT 60
#define HGATP64_VMID_SHIFT 44 #define HGATP64_VMID_SHIFT 44
#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL) #define HGATP64_VMID GENMASK(57, 44)
#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL) #define HGATP64_PPN GENMASK(43, 0)
#define HGATP_PAGE_SHIFT 12 #define HGATP_PAGE_SHIFT 12
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
#define HGATP_PPN HGATP64_PPN #define HGATP_PPN HGATP64_PPN
#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT #define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT
#define HGATP_VMID_MASK HGATP64_VMID_MASK #define HGATP_VMID HGATP64_VMID
#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT #define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT
#else #else
#define HGATP_PPN HGATP32_PPN #define HGATP_PPN HGATP32_PPN
#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT #define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT
#define HGATP_VMID_MASK HGATP32_VMID_MASK #define HGATP_VMID HGATP32_VMID
#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT #define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT
#endif #endif
...@@ -155,6 +158,27 @@ ...@@ -155,6 +158,27 @@
(_AC(1, UL) << IRQ_S_TIMER) | \ (_AC(1, UL) << IRQ_S_TIMER) | \
(_AC(1, UL) << IRQ_S_EXT)) (_AC(1, UL) << IRQ_S_EXT))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
#define TOPI_IID_MASK GENMASK(11, 0)
#define TOPI_IPRIO_MASK GENMASK(7, 0)
#define TOPI_IPRIO_BITS 8
#define TOPEI_ID_SHIFT 16
#define TOPEI_ID_MASK GENMASK(10, 0)
#define TOPEI_PRIO_MASK GENMASK(10, 0)
#define ISELECT_IPRIO0 0x30
#define ISELECT_IPRIO15 0x3f
#define ISELECT_MASK GENMASK(8, 0)
#define HVICTL_VTI BIT(30)
#define HVICTL_IID GENMASK(27, 16)
#define HVICTL_IID_SHIFT 16
#define HVICTL_DPR BIT(9)
#define HVICTL_IPRIOM BIT(8)
#define HVICTL_IPRIO GENMASK(7, 0)
/* xENVCFG flags */ /* xENVCFG flags */
#define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_STCE (_AC(1, ULL) << 63)
#define ENVCFG_PBMTE (_AC(1, ULL) << 62) #define ENVCFG_PBMTE (_AC(1, ULL) << 62)
...@@ -249,6 +273,18 @@ ...@@ -249,6 +273,18 @@
#define CSR_STIMECMP 0x14D #define CSR_STIMECMP 0x14D
#define CSR_STIMECMPH 0x15D #define CSR_STIMECMPH 0x15D
/* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */
#define CSR_SISELECT 0x150
#define CSR_SIREG 0x151
/* Supervisor-Level Interrupts (AIA) */
#define CSR_STOPEI 0x15c
#define CSR_STOPI 0xdb0
/* Supervisor-Level High-Half CSRs (AIA) */
#define CSR_SIEH 0x114
#define CSR_SIPH 0x154
#define CSR_VSSTATUS 0x200 #define CSR_VSSTATUS 0x200
#define CSR_VSIE 0x204 #define CSR_VSIE 0x204
#define CSR_VSTVEC 0x205 #define CSR_VSTVEC 0x205
...@@ -278,8 +314,32 @@ ...@@ -278,8 +314,32 @@
#define CSR_HGATP 0x680 #define CSR_HGATP 0x680
#define CSR_HGEIP 0xe12 #define CSR_HGEIP 0xe12
/* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */
#define CSR_HVIEN 0x608
#define CSR_HVICTL 0x609
#define CSR_HVIPRIO1 0x646
#define CSR_HVIPRIO2 0x647
/* VS-Level Window to Indirectly Accessed Registers (H-extension with AIA) */
#define CSR_VSISELECT 0x250
#define CSR_VSIREG 0x251
/* VS-Level Interrupts (H-extension with AIA) */
#define CSR_VSTOPEI 0x25c
#define CSR_VSTOPI 0xeb0
/* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */
#define CSR_HIDELEGH 0x613
#define CSR_HVIENH 0x618
#define CSR_HVIPH 0x655
#define CSR_HVIPRIO1H 0x656
#define CSR_HVIPRIO2H 0x657
#define CSR_VSIEH 0x214
#define CSR_VSIPH 0x254
#define CSR_MSTATUS 0x300 #define CSR_MSTATUS 0x300
#define CSR_MISA 0x301 #define CSR_MISA 0x301
#define CSR_MIDELEG 0x303
#define CSR_MIE 0x304 #define CSR_MIE 0x304
#define CSR_MTVEC 0x305 #define CSR_MTVEC 0x305
#define CSR_MENVCFG 0x30a #define CSR_MENVCFG 0x30a
...@@ -296,6 +356,25 @@ ...@@ -296,6 +356,25 @@
#define CSR_MIMPID 0xf13 #define CSR_MIMPID 0xf13
#define CSR_MHARTID 0xf14 #define CSR_MHARTID 0xf14
/* Machine-Level Window to Indirectly Accessed Registers (AIA) */
#define CSR_MISELECT 0x350
#define CSR_MIREG 0x351
/* Machine-Level Interrupts (AIA) */
#define CSR_MTOPEI 0x35c
#define CSR_MTOPI 0xfb0
/* Virtual Interrupts for Supervisor Level (AIA) */
#define CSR_MVIEN 0x308
#define CSR_MVIP 0x309
/* Machine-Level High-Half CSRs (AIA) */
#define CSR_MIDELEGH 0x313
#define CSR_MIEH 0x314
#define CSR_MVIENH 0x318
#define CSR_MVIPH 0x319
#define CSR_MIPH 0x354
#ifdef CONFIG_RISCV_M_MODE #ifdef CONFIG_RISCV_M_MODE
# define CSR_STATUS CSR_MSTATUS # define CSR_STATUS CSR_MSTATUS
# define CSR_IE CSR_MIE # define CSR_IE CSR_MIE
...@@ -306,6 +385,13 @@ ...@@ -306,6 +385,13 @@
# define CSR_TVAL CSR_MTVAL # define CSR_TVAL CSR_MTVAL
# define CSR_IP CSR_MIP # define CSR_IP CSR_MIP
# define CSR_IEH CSR_MIEH
# define CSR_ISELECT CSR_MISELECT
# define CSR_IREG CSR_MIREG
# define CSR_IPH CSR_MIPH
# define CSR_TOPEI CSR_MTOPEI
# define CSR_TOPI CSR_MTOPI
# define SR_IE SR_MIE # define SR_IE SR_MIE
# define SR_PIE SR_MPIE # define SR_PIE SR_MPIE
# define SR_PP SR_MPP # define SR_PP SR_MPP
...@@ -323,6 +409,13 @@ ...@@ -323,6 +409,13 @@
# define CSR_TVAL CSR_STVAL # define CSR_TVAL CSR_STVAL
# define CSR_IP CSR_SIP # define CSR_IP CSR_SIP
# define CSR_IEH CSR_SIEH
# define CSR_ISELECT CSR_SISELECT
# define CSR_IREG CSR_SIREG
# define CSR_IPH CSR_SIPH
# define CSR_TOPEI CSR_STOPEI
# define CSR_TOPI CSR_STOPI
# define SR_IE SR_SIE # define SR_IE SR_SIE
# define SR_PIE SR_SPIE # define SR_PIE SR_SPIE
# define SR_PP SR_SPP # define SR_PP SR_SPP
......
...@@ -44,10 +44,18 @@ ...@@ -44,10 +44,18 @@
#define RISCV_ISA_EXT_ZIHINTPAUSE 32 #define RISCV_ISA_EXT_ZIHINTPAUSE 32
#define RISCV_ISA_EXT_SVNAPOT 33 #define RISCV_ISA_EXT_SVNAPOT 33
#define RISCV_ISA_EXT_ZICBOZ 34 #define RISCV_ISA_EXT_ZICBOZ 34
#define RISCV_ISA_EXT_SMAIA 35
#define RISCV_ISA_EXT_SSAIA 36
#define RISCV_ISA_EXT_MAX 64 #define RISCV_ISA_EXT_MAX 64
#define RISCV_ISA_EXT_NAME_LEN_MAX 32 #define RISCV_ISA_EXT_NAME_LEN_MAX 32
#ifdef CONFIG_RISCV_M_MODE
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA
#else
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/jump_label.h> #include <linux/jump_label.h>
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
* Copyright (C) 2022 Ventana Micro Systems Inc.
*
* Authors:
* Anup Patel <apatel@ventanamicro.com>
*/
#ifndef __KVM_RISCV_AIA_H
#define __KVM_RISCV_AIA_H
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
#include <asm/csr.h>
struct kvm_aia {
/* In-kernel irqchip created */
bool in_kernel;
/* In-kernel irqchip initialized */
bool initialized;
};
struct kvm_vcpu_aia_csr {
unsigned long vsiselect;
unsigned long hviprio1;
unsigned long hviprio2;
unsigned long vsieh;
unsigned long hviph;
unsigned long hviprio1h;
unsigned long hviprio2h;
};
struct kvm_vcpu_aia {
/* CPU AIA CSR context of Guest VCPU */
struct kvm_vcpu_aia_csr guest_csr;
/* CPU AIA CSR context upon Guest VCPU reset */
struct kvm_vcpu_aia_csr guest_reset_csr;
};
#define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized)
#define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel)
DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
#define kvm_riscv_aia_available() \
static_branch_unlikely(&kvm_riscv_aia_available)
#define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1)
static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
unsigned long isel,
unsigned long *val,
unsigned long new_val,
unsigned long wr_mask)
{
return 0;
}
#ifdef CONFIG_32BIT
void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu);
#else
static inline void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu)
{
}
static inline void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
{
}
#endif
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *out_val);
int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long val);
int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu,
unsigned int csr_num,
unsigned long *val,
unsigned long new_val,
unsigned long wr_mask);
int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask);
#define KVM_RISCV_VCPU_AIA_CSR_FUNCS \
{ .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \
{ .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei },
static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
{
return 1;
}
static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
{
}
static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
{
return 0;
}
static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
{
}
static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
{
}
static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
{
}
void kvm_riscv_aia_enable(void);
void kvm_riscv_aia_disable(void);
int kvm_riscv_aia_init(void);
void kvm_riscv_aia_exit(void);
#endif
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/kvm_types.h> #include <linux/kvm_types.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/kvm_aia.h>
#include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_insn.h>
#include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_sbi.h>
...@@ -94,6 +95,9 @@ struct kvm_arch { ...@@ -94,6 +95,9 @@ struct kvm_arch {
/* Guest Timer */ /* Guest Timer */
struct kvm_guest_timer timer; struct kvm_guest_timer timer;
/* AIA Guest/VM context */
struct kvm_aia aia;
}; };
struct kvm_cpu_trap { struct kvm_cpu_trap {
...@@ -200,8 +204,9 @@ struct kvm_vcpu_arch { ...@@ -200,8 +204,9 @@ struct kvm_vcpu_arch {
* in irqs_pending. Our approach is modeled around multiple producer * in irqs_pending. Our approach is modeled around multiple producer
* and single consumer problem where the consumer is the VCPU itself. * and single consumer problem where the consumer is the VCPU itself.
*/ */
unsigned long irqs_pending; #define KVM_RISCV_VCPU_NR_IRQS 64
unsigned long irqs_pending_mask; DECLARE_BITMAP(irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
DECLARE_BITMAP(irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
/* VCPU Timer */ /* VCPU Timer */
struct kvm_vcpu_timer timer; struct kvm_vcpu_timer timer;
...@@ -221,6 +226,9 @@ struct kvm_vcpu_arch { ...@@ -221,6 +226,9 @@ struct kvm_vcpu_arch {
/* SBI context */ /* SBI context */
struct kvm_vcpu_sbi_context sbi_context; struct kvm_vcpu_sbi_context sbi_context;
/* AIA VCPU context */
struct kvm_vcpu_aia aia_context;
/* Cache pages needed to program page tables with spinlock held */ /* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_cache;
...@@ -327,7 +335,7 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); ...@@ -327,7 +335,7 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask); bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
struct kvm_vcpu_sbi_context { struct kvm_vcpu_sbi_context {
int return_handled; int return_handled;
bool extension_disabled[KVM_RISCV_SBI_EXT_MAX];
}; };
struct kvm_vcpu_sbi_return { struct kvm_vcpu_sbi_return {
...@@ -45,7 +46,12 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, ...@@ -45,7 +46,12 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
struct kvm_run *run, struct kvm_run *run,
u32 type, u64 flags); u32 type, u64 flags);
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid);
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
#ifdef CONFIG_RISCV_SBI_V01 #ifdef CONFIG_RISCV_SBI_V01
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/types.h> #include <linux/types.h>
#include <asm/bitsperlong.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_READONLY_MEM
...@@ -65,7 +66,7 @@ struct kvm_riscv_core { ...@@ -65,7 +66,7 @@ struct kvm_riscv_core {
#define KVM_RISCV_MODE_S 1 #define KVM_RISCV_MODE_S 1
#define KVM_RISCV_MODE_U 0 #define KVM_RISCV_MODE_U 0
/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ /* General CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
struct kvm_riscv_csr { struct kvm_riscv_csr {
unsigned long sstatus; unsigned long sstatus;
unsigned long sie; unsigned long sie;
...@@ -79,6 +80,17 @@ struct kvm_riscv_csr { ...@@ -79,6 +80,17 @@ struct kvm_riscv_csr {
unsigned long scounteren; unsigned long scounteren;
}; };
/* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
struct kvm_riscv_aia_csr {
unsigned long siselect;
unsigned long iprio1;
unsigned long iprio2;
unsigned long sieh;
unsigned long siph;
unsigned long iprio1h;
unsigned long iprio2h;
};
/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
struct kvm_riscv_timer { struct kvm_riscv_timer {
__u64 frequency; __u64 frequency;
...@@ -107,9 +119,28 @@ enum KVM_RISCV_ISA_EXT_ID { ...@@ -107,9 +119,28 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZIHINTPAUSE, KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
KVM_RISCV_ISA_EXT_ZICBOM, KVM_RISCV_ISA_EXT_ZICBOM,
KVM_RISCV_ISA_EXT_ZICBOZ, KVM_RISCV_ISA_EXT_ZICBOZ,
KVM_RISCV_ISA_EXT_ZBB,
KVM_RISCV_ISA_EXT_SSAIA,
KVM_RISCV_ISA_EXT_MAX, KVM_RISCV_ISA_EXT_MAX,
}; };
/*
* SBI extension IDs specific to KVM. This is not the same as the SBI
* extension IDs defined by the RISC-V SBI specification.
*/
enum KVM_RISCV_SBI_EXT_ID {
KVM_RISCV_SBI_EXT_V01 = 0,
KVM_RISCV_SBI_EXT_TIME,
KVM_RISCV_SBI_EXT_IPI,
KVM_RISCV_SBI_EXT_RFENCE,
KVM_RISCV_SBI_EXT_SRST,
KVM_RISCV_SBI_EXT_HSM,
KVM_RISCV_SBI_EXT_PMU,
KVM_RISCV_SBI_EXT_EXPERIMENTAL,
KVM_RISCV_SBI_EXT_VENDOR,
KVM_RISCV_SBI_EXT_MAX,
};
/* Possible states for kvm_riscv_timer */ /* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1 #define KVM_RISCV_TIMER_STATE_ON 1
...@@ -120,6 +151,8 @@ enum KVM_RISCV_ISA_EXT_ID { ...@@ -120,6 +151,8 @@ enum KVM_RISCV_ISA_EXT_ID {
/* If you need to interpret the index values, here is the key: */ /* If you need to interpret the index values, here is the key: */
#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
#define KVM_REG_RISCV_TYPE_SHIFT 24 #define KVM_REG_RISCV_TYPE_SHIFT 24
#define KVM_REG_RISCV_SUBTYPE_MASK 0x0000000000FF0000
#define KVM_REG_RISCV_SUBTYPE_SHIFT 16
/* Config registers are mapped as type 1 */ /* Config registers are mapped as type 1 */
#define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT) #define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
...@@ -133,8 +166,12 @@ enum KVM_RISCV_ISA_EXT_ID { ...@@ -133,8 +166,12 @@ enum KVM_RISCV_ISA_EXT_ID {
/* Control and status registers are mapped as type 3 */ /* Control and status registers are mapped as type 3 */
#define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
#define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
#define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
#define KVM_REG_RISCV_CSR_REG(name) \ #define KVM_REG_RISCV_CSR_REG(name) \
(offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
#define KVM_REG_RISCV_CSR_AIA_REG(name) \
(offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long))
/* Timer registers are mapped as type 4 */ /* Timer registers are mapped as type 4 */
#define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT)
...@@ -154,6 +191,18 @@ enum KVM_RISCV_ISA_EXT_ID { ...@@ -154,6 +191,18 @@ enum KVM_RISCV_ISA_EXT_ID {
/* ISA Extension registers are mapped as type 7 */ /* ISA Extension registers are mapped as type 7 */
#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
/* SBI extension registers are mapped as type 8 */
#define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT)
#define KVM_REG_RISCV_SBI_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
#define KVM_REG_RISCV_SBI_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
#define KVM_REG_RISCV_SBI_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
#define KVM_REG_RISCV_SBI_MULTI_REG(__ext_id) \
((__ext_id) / __BITS_PER_LONG)
#define KVM_REG_RISCV_SBI_MULTI_MASK(__ext_id) \
(1UL << ((__ext_id) % __BITS_PER_LONG))
#define KVM_REG_RISCV_SBI_MULTI_REG_LAST \
KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
#endif #endif
#endif /* __LINUX_KVM_RISCV_H */ #endif /* __LINUX_KVM_RISCV_H */
...@@ -185,6 +185,8 @@ static struct riscv_isa_ext_data isa_ext_arr[] = { ...@@ -185,6 +185,8 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
__RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
......
...@@ -228,6 +228,8 @@ void __init riscv_fill_hwcap(void) ...@@ -228,6 +228,8 @@ void __init riscv_fill_hwcap(void)
} }
} else { } else {
/* sorted alphabetically */ /* sorted alphabetically */
SET_ISA_EXT_MAP("smaia", RISCV_ISA_EXT_SMAIA);
SET_ISA_EXT_MAP("ssaia", RISCV_ISA_EXT_SSAIA);
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
......
...@@ -20,14 +20,14 @@ if VIRTUALIZATION ...@@ -20,14 +20,14 @@ if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU depends on RISCV_SBI && MMU
select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_HARDWARE_ENABLING
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select KVM_MMIO select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK select KVM_XFER_TO_GUEST_WORK
select HAVE_KVM_VCPU_ASYNC_IOCTL select MMU_NOTIFIER
select HAVE_KVM_EVENTFD select PREEMPT_NOTIFIERS
help help
Support hosting virtualized guest machines. Support hosting virtualized guest machines.
......
...@@ -26,3 +26,4 @@ kvm-y += vcpu_sbi_replace.o ...@@ -26,3 +26,4 @@ kvm-y += vcpu_sbi_replace.o
kvm-y += vcpu_sbi_hsm.o kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
* Copyright (C) 2022 Ventana Micro Systems Inc.
*
* Authors:
* Anup Patel <apatel@ventanamicro.com>
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <asm/hwcap.h>
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
static void aia_set_hvictl(bool ext_irq_pending)
{
unsigned long hvictl;
/*
* HVICTL.IID == 9 and HVICTL.IPRIO == 0 represents
* no interrupt in HVICTL.
*/
hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID;
hvictl |= ext_irq_pending;
csr_write(CSR_HVICTL, hvictl);
}
#ifdef CONFIG_32BIT
void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
unsigned long mask, val;
if (!kvm_riscv_aia_available())
return;
if (READ_ONCE(vcpu->arch.irqs_pending_mask[1])) {
mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[1], 0);
val = READ_ONCE(vcpu->arch.irqs_pending[1]) & mask;
csr->hviph &= ~mask;
csr->hviph |= val;
}
}
void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (kvm_riscv_aia_available())
csr->vsieh = csr_read(CSR_VSIEH);
}
#endif
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
unsigned long seip;
if (!kvm_riscv_aia_available())
return false;
#ifdef CONFIG_32BIT
if (READ_ONCE(vcpu->arch.irqs_pending[1]) &
(vcpu->arch.aia_context.guest_csr.vsieh & upper_32_bits(mask)))
return true;
#endif
seip = vcpu->arch.guest_csr.vsie;
seip &= (unsigned long)mask;
seip &= BIT(IRQ_S_EXT);
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
return false;
}
void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
if (!kvm_riscv_aia_available())
return;
#ifdef CONFIG_32BIT
csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph);
#endif
aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT)));
}
void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (!kvm_riscv_aia_available())
return;
csr_write(CSR_VSISELECT, csr->vsiselect);
csr_write(CSR_HVIPRIO1, csr->hviprio1);
csr_write(CSR_HVIPRIO2, csr->hviprio2);
#ifdef CONFIG_32BIT
csr_write(CSR_VSIEH, csr->vsieh);
csr_write(CSR_HVIPH, csr->hviph);
csr_write(CSR_HVIPRIO1H, csr->hviprio1h);
csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
#endif
}
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (!kvm_riscv_aia_available())
return;
csr->vsiselect = csr_read(CSR_VSISELECT);
csr->hviprio1 = csr_read(CSR_HVIPRIO1);
csr->hviprio2 = csr_read(CSR_HVIPRIO2);
#ifdef CONFIG_32BIT
csr->vsieh = csr_read(CSR_VSIEH);
csr->hviph = csr_read(CSR_HVIPH);
csr->hviprio1h = csr_read(CSR_HVIPRIO1H);
csr->hviprio2h = csr_read(CSR_HVIPRIO2H);
#endif
}
int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *out_val)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
return -EINVAL;
*out_val = 0;
if (kvm_riscv_aia_available())
*out_val = ((unsigned long *)csr)[reg_num];
return 0;
}
int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long val)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
return -EINVAL;
if (kvm_riscv_aia_available()) {
((unsigned long *)csr)[reg_num] = val;
#ifdef CONFIG_32BIT
if (reg_num == KVM_REG_RISCV_CSR_AIA_REG(siph))
WRITE_ONCE(vcpu->arch.irqs_pending_mask[1], 0);
#endif
}
return 0;
}
int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu,
unsigned int csr_num,
unsigned long *val,
unsigned long new_val,
unsigned long wr_mask)
{
/* If AIA not available then redirect trap */
if (!kvm_riscv_aia_available())
return KVM_INSN_ILLEGAL_TRAP;
/* If AIA not initialized then forward to user space */
if (!kvm_riscv_aia_initialized(vcpu->kvm))
return KVM_INSN_EXIT_TO_USER_SPACE;
return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, KVM_RISCV_AIA_IMSIC_TOPEI,
val, new_val, wr_mask);
}
/*
* External IRQ priority always read-only zero. This means default
* priority order is always preferred for external IRQs unless
* HVICTL.IID == 9 and HVICTL.IPRIO != 0
*/
static int aia_irq2bitpos[] = {
0, 8, -1, -1, 16, 24, -1, -1, /* 0 - 7 */
32, -1, -1, -1, -1, 40, 48, 56, /* 8 - 15 */
64, 72, 80, 88, 96, 104, 112, 120, /* 16 - 23 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 24 - 31 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 32 - 39 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 40 - 47 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 48 - 55 */
-1, -1, -1, -1, -1, -1, -1, -1, /* 56 - 63 */
};
static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq)
{
unsigned long hviprio;
int bitpos = aia_irq2bitpos[irq];
if (bitpos < 0)
return 0;
switch (bitpos / BITS_PER_LONG) {
case 0:
hviprio = csr_read(CSR_HVIPRIO1);
break;
case 1:
#ifndef CONFIG_32BIT
hviprio = csr_read(CSR_HVIPRIO2);
break;
#else
hviprio = csr_read(CSR_HVIPRIO1H);
break;
case 2:
hviprio = csr_read(CSR_HVIPRIO2);
break;
case 3:
hviprio = csr_read(CSR_HVIPRIO2H);
break;
#endif
default:
return 0;
}
return (hviprio >> (bitpos % BITS_PER_LONG)) & TOPI_IPRIO_MASK;
}
static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio)
{
unsigned long hviprio;
int bitpos = aia_irq2bitpos[irq];
if (bitpos < 0)
return;
switch (bitpos / BITS_PER_LONG) {
case 0:
hviprio = csr_read(CSR_HVIPRIO1);
break;
case 1:
#ifndef CONFIG_32BIT
hviprio = csr_read(CSR_HVIPRIO2);
break;
#else
hviprio = csr_read(CSR_HVIPRIO1H);
break;
case 2:
hviprio = csr_read(CSR_HVIPRIO2);
break;
case 3:
hviprio = csr_read(CSR_HVIPRIO2H);
break;
#endif
default:
return;
}
hviprio &= ~(TOPI_IPRIO_MASK << (bitpos % BITS_PER_LONG));
hviprio |= (unsigned long)prio << (bitpos % BITS_PER_LONG);
switch (bitpos / BITS_PER_LONG) {
case 0:
csr_write(CSR_HVIPRIO1, hviprio);
break;
case 1:
#ifndef CONFIG_32BIT
csr_write(CSR_HVIPRIO2, hviprio);
break;
#else
csr_write(CSR_HVIPRIO1H, hviprio);
break;
case 2:
csr_write(CSR_HVIPRIO2, hviprio);
break;
case 3:
csr_write(CSR_HVIPRIO2H, hviprio);
break;
#endif
default:
return;
}
}
static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
{
int i, first_irq, nirqs;
unsigned long old_val;
u8 prio;
#ifndef CONFIG_32BIT
if (isel & 0x1)
return KVM_INSN_ILLEGAL_TRAP;
#endif
nirqs = 4 * (BITS_PER_LONG / 32);
first_irq = (isel - ISELECT_IPRIO0) * 4;
old_val = 0;
for (i = 0; i < nirqs; i++) {
prio = aia_get_iprio8(vcpu, first_irq + i);
old_val |= (unsigned long)prio << (TOPI_IPRIO_BITS * i);
}
if (val)
*val = old_val;
if (wr_mask) {
new_val = (old_val & ~wr_mask) | (new_val & wr_mask);
for (i = 0; i < nirqs; i++) {
prio = (new_val >> (TOPI_IPRIO_BITS * i)) &
TOPI_IPRIO_MASK;
aia_set_iprio8(vcpu, first_irq + i, prio);
}
}
return KVM_INSN_CONTINUE_NEXT_SEPC;
}
#define IMSIC_FIRST 0x70
#define IMSIC_LAST 0xff
int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
{
unsigned int isel;
/* If AIA not available then redirect trap */
if (!kvm_riscv_aia_available())
return KVM_INSN_ILLEGAL_TRAP;
/* First try to emulate in kernel space */
isel = csr_read(CSR_VSISELECT) & ISELECT_MASK;
if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15)
return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask);
else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST &&
kvm_riscv_aia_initialized(vcpu->kvm))
return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, isel, val, new_val,
wr_mask);
/* We can't handle it here so redirect to user space */
return KVM_INSN_EXIT_TO_USER_SPACE;
}
void kvm_riscv_aia_enable(void)
{
if (!kvm_riscv_aia_available())
return;
aia_set_hvictl(false);
csr_write(CSR_HVIPRIO1, 0x0);
csr_write(CSR_HVIPRIO2, 0x0);
#ifdef CONFIG_32BIT
csr_write(CSR_HVIPH, 0x0);
csr_write(CSR_HIDELEGH, 0x0);
csr_write(CSR_HVIPRIO1H, 0x0);
csr_write(CSR_HVIPRIO2H, 0x0);
#endif
}
void kvm_riscv_aia_disable(void)
{
if (!kvm_riscv_aia_available())
return;
aia_set_hvictl(false);
}
int kvm_riscv_aia_init(void)
{
if (!riscv_isa_extension_available(NULL, SxAIA))
return -ENODEV;
/* Enable KVM AIA support */
static_branch_enable(&kvm_riscv_aia_available);
return 0;
}
void kvm_riscv_aia_exit(void)
{
}
...@@ -44,11 +44,15 @@ int kvm_arch_hardware_enable(void) ...@@ -44,11 +44,15 @@ int kvm_arch_hardware_enable(void)
csr_write(CSR_HVIP, 0); csr_write(CSR_HVIP, 0);
kvm_riscv_aia_enable();
return 0; return 0;
} }
void kvm_arch_hardware_disable(void) void kvm_arch_hardware_disable(void)
{ {
kvm_riscv_aia_disable();
/* /*
* After clearing the hideleg CSR, the host kernel will receive * After clearing the hideleg CSR, the host kernel will receive
* spurious interrupts if hvip CSR has pending interrupts and the * spurious interrupts if hvip CSR has pending interrupts and the
...@@ -63,6 +67,7 @@ void kvm_arch_hardware_disable(void) ...@@ -63,6 +67,7 @@ void kvm_arch_hardware_disable(void)
static int __init riscv_kvm_init(void) static int __init riscv_kvm_init(void)
{ {
int rc;
const char *str; const char *str;
if (!riscv_isa_extension_available(NULL, h)) { if (!riscv_isa_extension_available(NULL, h)) {
...@@ -84,6 +89,10 @@ static int __init riscv_kvm_init(void) ...@@ -84,6 +89,10 @@ static int __init riscv_kvm_init(void)
kvm_riscv_gstage_vmid_detect(); kvm_riscv_gstage_vmid_detect();
rc = kvm_riscv_aia_init();
if (rc && rc != -ENODEV)
return rc;
kvm_info("hypervisor extension available\n"); kvm_info("hypervisor extension available\n");
switch (kvm_riscv_gstage_mode()) { switch (kvm_riscv_gstage_mode()) {
...@@ -106,12 +115,23 @@ static int __init riscv_kvm_init(void) ...@@ -106,12 +115,23 @@ static int __init riscv_kvm_init(void)
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (kvm_riscv_aia_available())
kvm_info("AIA available\n");
rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (rc) {
kvm_riscv_aia_exit();
return rc;
}
return 0;
} }
module_init(riscv_kvm_init); module_init(riscv_kvm_init);
static void __exit riscv_kvm_exit(void) static void __exit riscv_kvm_exit(void)
{ {
kvm_riscv_aia_exit();
kvm_exit(); kvm_exit();
} }
module_exit(riscv_kvm_exit); module_exit(riscv_kvm_exit);
...@@ -628,6 +628,13 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, ...@@ -628,6 +628,13 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
!(memslot->flags & KVM_MEM_READONLY)) ? true : false; !(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq; unsigned long vma_pagesize, mmu_seq;
/* We need minimum second+third level pages */
ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
}
mmap_read_lock(current->mm); mmap_read_lock(current->mm);
vma = vma_lookup(current->mm, hva); vma = vma_lookup(current->mm, hva);
...@@ -648,6 +655,15 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, ...@@ -648,6 +655,15 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
* vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring
* kvm->mmu_lock.
*
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
* with the smp_wmb() in kvm_mmu_invalidate_end().
*/
mmu_seq = kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm); mmap_read_unlock(current->mm);
if (vma_pagesize != PUD_SIZE && if (vma_pagesize != PUD_SIZE &&
...@@ -657,15 +673,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, ...@@ -657,15 +673,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
return -EFAULT; return -EFAULT;
} }
/* We need minimum second+third level pages */
ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
}
mmu_seq = kvm->mmu_invalidate_seq;
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable);
if (hfn == KVM_PFN_ERR_HWPOISON) { if (hfn == KVM_PFN_ERR_HWPOISON) {
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
...@@ -748,8 +755,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) ...@@ -748,8 +755,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
unsigned long hgatp = gstage_mode; unsigned long hgatp = gstage_mode;
struct kvm_arch *k = &vcpu->kvm->arch; struct kvm_arch *k = &vcpu->kvm->arch;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
HGATP_VMID_MASK;
hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
csr_write(CSR_HGATP, hgatp); csr_write(CSR_HGATP, hgatp);
......
...@@ -58,9 +58,11 @@ static const unsigned long kvm_isa_ext_arr[] = { ...@@ -58,9 +58,11 @@ static const unsigned long kvm_isa_ext_arr[] = {
[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICBOZ),
...@@ -97,9 +99,11 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) ...@@ -97,9 +99,11 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M: case KVM_RISCV_ISA_EXT_M:
case KVM_RISCV_ISA_EXT_SSAIA:
case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZBB:
return false; return false;
default: default:
break; break;
...@@ -136,8 +140,10 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) ...@@ -136,8 +140,10 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_timer_reset(vcpu); kvm_riscv_vcpu_timer_reset(vcpu);
WRITE_ONCE(vcpu->arch.irqs_pending, 0); kvm_riscv_vcpu_aia_reset(vcpu);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
kvm_riscv_vcpu_pmu_reset(vcpu); kvm_riscv_vcpu_pmu_reset(vcpu);
...@@ -158,6 +164,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) ...@@ -158,6 +164,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{ {
int rc;
struct kvm_cpu_context *cntx; struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
unsigned long host_isa, i; unsigned long host_isa, i;
...@@ -200,6 +207,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -200,6 +207,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* setup performance monitoring */ /* setup performance monitoring */
kvm_riscv_vcpu_pmu_init(vcpu); kvm_riscv_vcpu_pmu_init(vcpu);
/* Setup VCPU AIA */
rc = kvm_riscv_vcpu_aia_init(vcpu);
if (rc)
return rc;
/* Reset VCPU */ /* Reset VCPU */
kvm_riscv_reset_vcpu(vcpu); kvm_riscv_reset_vcpu(vcpu);
...@@ -219,6 +231,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) ...@@ -219,6 +231,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
/* Cleanup VCPU AIA context */
kvm_riscv_vcpu_aia_deinit(vcpu);
/* Cleanup VCPU timer */ /* Cleanup VCPU timer */
kvm_riscv_vcpu_timer_deinit(vcpu); kvm_riscv_vcpu_timer_deinit(vcpu);
...@@ -455,27 +470,76 @@ static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, ...@@ -455,27 +470,76 @@ static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *out_val)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
return -EINVAL;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
kvm_riscv_vcpu_flush_interrupts(vcpu);
*out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
*out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
} else
*out_val = ((unsigned long *)csr)[reg_num];
return 0;
}
static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
return -EINVAL;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
reg_val &= VSIP_VALID_MASK;
reg_val <<= VSIP_TO_HVIP_SHIFT;
}
((unsigned long *)csr)[reg_num] = reg_val;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
return 0;
}
static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg) const struct kvm_one_reg *reg)
{ {
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; int rc;
unsigned long __user *uaddr = unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr; (unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK | KVM_REG_SIZE_MASK |
KVM_REG_RISCV_CSR); KVM_REG_RISCV_CSR);
unsigned long reg_val; unsigned long reg_val, reg_subtype;
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
return -EINVAL; return -EINVAL;
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
return -EINVAL;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
kvm_riscv_vcpu_flush_interrupts(vcpu); reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; switch (reg_subtype) {
} else case KVM_REG_RISCV_CSR_GENERAL:
reg_val = ((unsigned long *)csr)[reg_num]; rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
break;
case KVM_REG_RISCV_CSR_AIA:
rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
break;
default:
rc = -EINVAL;
break;
}
if (rc)
return rc;
if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id))) if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
return -EFAULT; return -EFAULT;
...@@ -486,31 +550,35 @@ static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, ...@@ -486,31 +550,35 @@ static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg) const struct kvm_one_reg *reg)
{ {
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; int rc;
unsigned long __user *uaddr = unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr; (unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK | KVM_REG_SIZE_MASK |
KVM_REG_RISCV_CSR); KVM_REG_RISCV_CSR);
unsigned long reg_val; unsigned long reg_val, reg_subtype;
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
return -EINVAL; return -EINVAL;
if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
return -EINVAL;
if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id))) if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT; return -EFAULT;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
reg_val &= VSIP_VALID_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
reg_val <<= VSIP_TO_HVIP_SHIFT; switch (reg_subtype) {
case KVM_REG_RISCV_CSR_GENERAL:
rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
break;
case KVM_REG_RISCV_CSR_AIA:
rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
break;
default:
rc = -EINVAL;
break;
} }
if (rc)
((unsigned long *)csr)[reg_num] = reg_val; return rc;
if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
return 0; return 0;
} }
...@@ -609,6 +677,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, ...@@ -609,6 +677,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
KVM_REG_RISCV_FP_D); KVM_REG_RISCV_FP_D);
case KVM_REG_RISCV_ISA_EXT: case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
default: default:
break; break;
} }
...@@ -636,6 +706,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, ...@@ -636,6 +706,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
KVM_REG_RISCV_FP_D); KVM_REG_RISCV_FP_D);
case KVM_REG_RISCV_ISA_EXT: case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
default: default:
break; break;
} }
...@@ -736,13 +808,16 @@ void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) ...@@ -736,13 +808,16 @@ void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
unsigned long mask, val; unsigned long mask, val;
if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
val = READ_ONCE(vcpu->arch.irqs_pending) & mask; val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
csr->hvip &= ~mask; csr->hvip &= ~mask;
csr->hvip |= val; csr->hvip |= val;
} }
/* Flush AIA high interrupts */
kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
} }
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
...@@ -759,29 +834,38 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) ...@@ -759,29 +834,38 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
if (hvip & (1UL << IRQ_VS_SOFT)) { if (hvip & (1UL << IRQ_VS_SOFT)) {
if (!test_and_set_bit(IRQ_VS_SOFT, if (!test_and_set_bit(IRQ_VS_SOFT,
&v->irqs_pending_mask)) v->irqs_pending_mask))
set_bit(IRQ_VS_SOFT, &v->irqs_pending); set_bit(IRQ_VS_SOFT, v->irqs_pending);
} else { } else {
if (!test_and_set_bit(IRQ_VS_SOFT, if (!test_and_set_bit(IRQ_VS_SOFT,
&v->irqs_pending_mask)) v->irqs_pending_mask))
clear_bit(IRQ_VS_SOFT, &v->irqs_pending); clear_bit(IRQ_VS_SOFT, v->irqs_pending);
} }
} }
/* Sync-up AIA high interrupts */
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
/* Sync-up timer CSRs */ /* Sync-up timer CSRs */
kvm_riscv_vcpu_timer_sync(vcpu); kvm_riscv_vcpu_timer_sync(vcpu);
} }
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{ {
if (irq != IRQ_VS_SOFT && /*
* We only allow VS-mode software, timer, and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER && irq != IRQ_VS_TIMER &&
irq != IRQ_VS_EXT) irq != IRQ_VS_EXT)
return -EINVAL; return -EINVAL;
set_bit(irq, &vcpu->arch.irqs_pending); set_bit(irq, vcpu->arch.irqs_pending);
smp_mb__before_atomic(); smp_mb__before_atomic();
set_bit(irq, &vcpu->arch.irqs_pending_mask); set_bit(irq, vcpu->arch.irqs_pending_mask);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
...@@ -790,24 +874,37 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) ...@@ -790,24 +874,37 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{ {
if (irq != IRQ_VS_SOFT && /*
* We only allow VS-mode software, timer, and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER && irq != IRQ_VS_TIMER &&
irq != IRQ_VS_EXT) irq != IRQ_VS_EXT)
return -EINVAL; return -EINVAL;
clear_bit(irq, &vcpu->arch.irqs_pending); clear_bit(irq, vcpu->arch.irqs_pending);
smp_mb__before_atomic(); smp_mb__before_atomic();
set_bit(irq, &vcpu->arch.irqs_pending_mask); set_bit(irq, vcpu->arch.irqs_pending_mask);
return 0; return 0;
} }
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask) bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{ {
unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) unsigned long ie;
<< VSIP_TO_HVIP_SHIFT) & mask;
ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
<< VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
(unsigned long)mask;
if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
return true;
return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false; /* Check AIA high interrupts */
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
} }
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
...@@ -906,6 +1003,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -906,6 +1003,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
vcpu->arch.isa); vcpu->arch.isa);
kvm_riscv_vcpu_aia_load(vcpu, cpu);
vcpu->cpu = cpu; vcpu->cpu = cpu;
} }
...@@ -915,6 +1014,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) ...@@ -915,6 +1014,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1; vcpu->cpu = -1;
kvm_riscv_vcpu_aia_put(vcpu);
kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
vcpu->arch.isa); vcpu->arch.isa);
kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
...@@ -982,6 +1083,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) ...@@ -982,6 +1083,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_HVIP, csr->hvip);
kvm_riscv_vcpu_aia_update_hvip(vcpu);
} }
/* /*
...@@ -1054,6 +1156,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -1054,6 +1156,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_check_vcpu_requests(vcpu); kvm_riscv_check_vcpu_requests(vcpu);
preempt_disable();
/* Update AIA HW state before entering guest */
ret = kvm_riscv_vcpu_aia_update(vcpu);
if (ret <= 0) {
preempt_enable();
continue;
}
local_irq_disable(); local_irq_disable();
/* /*
...@@ -1082,6 +1193,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -1082,6 +1193,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
xfer_to_guest_mode_work_pending()) { xfer_to_guest_mode_work_pending()) {
vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable(); local_irq_enable();
preempt_enable();
kvm_vcpu_srcu_read_lock(vcpu); kvm_vcpu_srcu_read_lock(vcpu);
continue; continue;
} }
...@@ -1115,8 +1227,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -1115,8 +1227,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Syncup interrupts state with HW */ /* Syncup interrupts state with HW */
kvm_riscv_vcpu_sync_interrupts(vcpu); kvm_riscv_vcpu_sync_interrupts(vcpu);
preempt_disable();
/* /*
* We must ensure that any pending interrupts are taken before * We must ensure that any pending interrupts are taken before
* we exit guest timing so that timer ticks are accounted as * we exit guest timing so that timer ticks are accounted as
......
...@@ -214,6 +214,7 @@ struct csr_func { ...@@ -214,6 +214,7 @@ struct csr_func {
}; };
static const struct csr_func csr_funcs[] = { static const struct csr_func csr_funcs[] = {
KVM_RISCV_VCPU_AIA_CSR_FUNCS
KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS
}; };
......
...@@ -30,17 +30,52 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { ...@@ -30,17 +30,52 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
}; };
#endif #endif
static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { struct kvm_riscv_sbi_extension_entry {
&vcpu_sbi_ext_v01, enum KVM_RISCV_SBI_EXT_ID dis_idx;
&vcpu_sbi_ext_base, const struct kvm_vcpu_sbi_extension *ext_ptr;
&vcpu_sbi_ext_time, };
&vcpu_sbi_ext_ipi,
&vcpu_sbi_ext_rfence, static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
&vcpu_sbi_ext_srst, {
&vcpu_sbi_ext_hsm, .dis_idx = KVM_RISCV_SBI_EXT_V01,
&vcpu_sbi_ext_pmu, .ext_ptr = &vcpu_sbi_ext_v01,
&vcpu_sbi_ext_experimental, },
&vcpu_sbi_ext_vendor, {
.dis_idx = KVM_RISCV_SBI_EXT_MAX, /* Can't be disabled */
.ext_ptr = &vcpu_sbi_ext_base,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_TIME,
.ext_ptr = &vcpu_sbi_ext_time,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_IPI,
.ext_ptr = &vcpu_sbi_ext_ipi,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_RFENCE,
.ext_ptr = &vcpu_sbi_ext_rfence,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_SRST,
.ext_ptr = &vcpu_sbi_ext_srst,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_HSM,
.ext_ptr = &vcpu_sbi_ext_hsm,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_PMU,
.ext_ptr = &vcpu_sbi_ext_pmu,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
.ext_ptr = &vcpu_sbi_ext_experimental,
},
{
.dis_idx = KVM_RISCV_SBI_EXT_VENDOR,
.ext_ptr = &vcpu_sbi_ext_vendor,
},
}; };
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
...@@ -99,14 +134,192 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -99,14 +134,192 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0; return 0;
} }
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid) static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val)
{
unsigned long i;
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
(reg_val != 1 && reg_val != 0))
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
if (sbi_ext[i].dis_idx == reg_num) {
sext = &sbi_ext[i];
break;
}
}
if (!sext)
return -ENOENT;
scontext->extension_disabled[sext->dis_idx] = !reg_val;
return 0;
}
static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *reg_val)
{
unsigned long i;
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
if (sbi_ext[i].dis_idx == reg_num) {
sext = &sbi_ext[i];
break;
}
}
if (!sext)
return -ENOENT;
*reg_val = !scontext->extension_disabled[sext->dis_idx];
return 0;
}
static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val, bool enable)
{
unsigned long i, ext_id;
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
return -EINVAL;
for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
ext_id = i + reg_num * BITS_PER_LONG;
if (ext_id >= KVM_RISCV_SBI_EXT_MAX)
break;
riscv_vcpu_set_sbi_ext_single(vcpu, ext_id, enable);
}
return 0;
}
static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *reg_val)
{
unsigned long i, ext_id, ext_val;
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
return -EINVAL;
for (i = 0; i < BITS_PER_LONG; i++) {
ext_id = i + reg_num * BITS_PER_LONG;
if (ext_id >= KVM_RISCV_SBI_EXT_MAX)
break;
ext_val = 0;
riscv_vcpu_get_sbi_ext_single(vcpu, ext_id, &ext_val);
if (ext_val)
*reg_val |= KVM_REG_RISCV_SBI_MULTI_MASK(ext_id);
}
return 0;
}
int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK |
KVM_REG_RISCV_SBI_EXT);
unsigned long reg_val, reg_subtype;
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
return -EINVAL;
if (vcpu->arch.ran_atleast_once)
return -EBUSY;
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_SINGLE:
return riscv_vcpu_set_sbi_ext_single(vcpu, reg_num, reg_val);
case KVM_REG_RISCV_SBI_MULTI_EN:
return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, true);
case KVM_REG_RISCV_SBI_MULTI_DIS:
return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
default:
return -EINVAL;
}
return 0;
}
int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
int rc;
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK |
KVM_REG_RISCV_SBI_EXT);
unsigned long reg_val, reg_subtype;
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
return -EINVAL;
reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
reg_val = 0;
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_SINGLE:
rc = riscv_vcpu_get_sbi_ext_single(vcpu, reg_num, &reg_val);
break;
case KVM_REG_RISCV_SBI_MULTI_EN:
case KVM_REG_RISCV_SBI_MULTI_DIS:
rc = riscv_vcpu_get_sbi_ext_multi(vcpu, reg_num, &reg_val);
if (!rc && reg_subtype == KVM_REG_RISCV_SBI_MULTI_DIS)
reg_val = ~reg_val;
break;
default:
rc = -EINVAL;
}
if (rc)
return rc;
if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
return -EFAULT;
return 0;
}
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid)
{ {
int i = 0; int i;
const struct kvm_riscv_sbi_extension_entry *sext;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
if (sbi_ext[i]->extid_start <= extid && sext = &sbi_ext[i];
sbi_ext[i]->extid_end >= extid) if (sext->ext_ptr->extid_start <= extid &&
return sbi_ext[i]; sext->ext_ptr->extid_end >= extid) {
if (sext->dis_idx < KVM_RISCV_SBI_EXT_MAX &&
scontext->extension_disabled[sext->dis_idx])
return NULL;
return sbi_ext[i].ext_ptr;
}
} }
return NULL; return NULL;
...@@ -126,7 +339,7 @@ int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -126,7 +339,7 @@ int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
}; };
bool ext_is_v01 = false; bool ext_is_v01 = false;
sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7); sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a7);
if (sbi_ext && sbi_ext->handler) { if (sbi_ext && sbi_ext->handler) {
#ifdef CONFIG_RISCV_SBI_V01 #ifdef CONFIG_RISCV_SBI_V01
if (cp->a7 >= SBI_EXT_0_1_SET_TIMER && if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
......
...@@ -44,7 +44,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ...@@ -44,7 +44,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
kvm_riscv_vcpu_sbi_forward(vcpu, run); kvm_riscv_vcpu_sbi_forward(vcpu, run);
retdata->uexit = true; retdata->uexit = true;
} else { } else {
sbi_ext = kvm_vcpu_sbi_find_ext(cp->a0); sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a0);
*out_val = sbi_ext && sbi_ext->probe ? *out_val = sbi_ext && sbi_ext->probe ?
sbi_ext->probe(vcpu) : !!sbi_ext; sbi_ext->probe(vcpu) : !!sbi_ext;
} }
......
...@@ -41,6 +41,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -41,6 +41,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return r; return r;
} }
kvm_riscv_aia_init_vm(kvm);
kvm_riscv_guest_timer_init(kvm); kvm_riscv_guest_timer_init(kvm);
return 0; return 0;
...@@ -49,6 +51,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -49,6 +51,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
void kvm_arch_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm)
{ {
kvm_destroy_vcpus(kvm); kvm_destroy_vcpus(kvm);
kvm_riscv_aia_destroy_vm(kvm);
} }
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
......
...@@ -26,9 +26,9 @@ void __init kvm_riscv_gstage_vmid_detect(void) ...@@ -26,9 +26,9 @@ void __init kvm_riscv_gstage_vmid_detect(void)
/* Figure-out number of VMID bits in HW */ /* Figure-out number of VMID bits in HW */
old = csr_read(CSR_HGATP); old = csr_read(CSR_HGATP);
csr_write(CSR_HGATP, old | HGATP_VMID_MASK); csr_write(CSR_HGATP, old | HGATP_VMID);
vmid_bits = csr_read(CSR_HGATP); vmid_bits = csr_read(CSR_HGATP);
vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT; vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
vmid_bits = fls_long(vmid_bits); vmid_bits = fls_long(vmid_bits);
csr_write(CSR_HGATP, old); csr_write(CSR_HGATP, old);
......
...@@ -192,21 +192,10 @@ static int expected_page_refs(struct page *page) ...@@ -192,21 +192,10 @@ static int expected_page_refs(struct page *page)
return res; return res;
} }
static int make_secure_pte(pte_t *ptep, unsigned long addr, static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
struct page *exp_page, struct uv_cb_header *uvcb)
{ {
pte_t entry = READ_ONCE(*ptep);
struct page *page;
int expected, cc = 0; int expected, cc = 0;
if (!pte_present(entry))
return -ENXIO;
if (pte_val(entry) & _PAGE_INVALID)
return -ENXIO;
page = pte_page(entry);
if (page != exp_page)
return -ENXIO;
if (PageWriteback(page)) if (PageWriteback(page))
return -EAGAIN; return -EAGAIN;
expected = expected_page_refs(page); expected = expected_page_refs(page);
...@@ -304,17 +293,18 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) ...@@ -304,17 +293,18 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
goto out; goto out;
rc = -ENXIO; rc = -ENXIO;
page = follow_page(vma, uaddr, FOLL_WRITE);
if (IS_ERR_OR_NULL(page))
goto out;
lock_page(page);
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
if (should_export_before_import(uvcb, gmap->mm)) if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
uv_convert_from_secure(page_to_phys(page)); page = pte_page(*ptep);
rc = make_secure_pte(ptep, uaddr, page, uvcb); rc = -EAGAIN;
if (trylock_page(page)) {
if (should_export_before_import(uvcb, gmap->mm))
uv_convert_from_secure(page_to_phys(page));
rc = make_page_secure(page, uvcb);
unlock_page(page);
}
}
pte_unmap_unlock(ptep, ptelock); pte_unmap_unlock(ptep, ptelock);
unlock_page(page);
out: out:
mmap_read_unlock(gmap->mm); mmap_read_unlock(gmap->mm);
......
...@@ -314,6 +314,11 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) ...@@ -314,6 +314,11 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
*/ */
if (kvm->arch.pv.set_aside) if (kvm->arch.pv.set_aside)
return -EINVAL; return -EINVAL;
/* Guest with segment type ASCE, refuse to destroy asynchronously */
if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
priv = kzalloc(sizeof(*priv), GFP_KERNEL); priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) if (!priv)
return -ENOMEM; return -ENOMEM;
......
...@@ -2822,6 +2822,9 @@ EXPORT_SYMBOL_GPL(s390_unlist_old_asce); ...@@ -2822,6 +2822,9 @@ EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced * @gmap: the gmap whose ASCE needs to be replaced
* *
* If the ASCE is a SEGMENT type then this function will return -EINVAL,
* otherwise the pointers in the host_to_guest radix tree will keep pointing
* to the wrong pages, causing use-after-free and memory corruption.
* If the allocation of the new top level page table fails, the ASCE is not * If the allocation of the new top level page table fails, the ASCE is not
* replaced. * replaced.
* In any case, the old ASCE is always removed from the gmap CRST list. * In any case, the old ASCE is always removed from the gmap CRST list.
...@@ -2836,6 +2839,10 @@ int s390_replace_asce(struct gmap *gmap) ...@@ -2836,6 +2839,10 @@ int s390_replace_asce(struct gmap *gmap)
s390_unlist_old_asce(gmap); s390_unlist_old_asce(gmap);
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
......
...@@ -40,7 +40,17 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, ...@@ -40,7 +40,17 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
{ {
/* Also waits for any queued work items. */ /*
* Invalidate all roots, which besides the obvious, schedules all roots
* for zapping and thus puts the TDP MMU's reference to each root, i.e.
* ultimately frees all roots.
*/
kvm_tdp_mmu_invalidate_all_roots(kvm);
/*
* Destroying a workqueue also first flushes the workqueue, i.e. no
* need to invoke kvm_tdp_mmu_zap_invalidated_roots().
*/
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
...@@ -116,16 +126,6 @@ static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root ...@@ -116,16 +126,6 @@ static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root
queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work);
} }
static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page)
{
union kvm_mmu_page_role role = page->role;
role.invalid = true;
/* No need to use cmpxchg, only the invalid bit can change. */
role.word = xchg(&page->role.word, role.word);
return role.invalid;
}
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared) bool shared)
{ {
...@@ -134,45 +134,12 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -134,45 +134,12 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
return; return;
WARN_ON(!is_tdp_mmu_page(root));
/* /*
* The root now has refcount=0. It is valid, but readers already * The TDP MMU itself holds a reference to each root until the root is
* cannot acquire a reference to it because kvm_tdp_mmu_get_root() * explicitly invalidated, i.e. the final reference should be never be
* rejects it. This remains true for the rest of the execution * put for a valid root.
* of this function, because readers visit valid roots only
* (except for tdp_mmu_zap_root_work(), which however
* does not acquire any reference itself).
*
* Even though there are flows that need to visit all roots for
* correctness, they all take mmu_lock for write, so they cannot yet
* run concurrently. The same is true after kvm_tdp_root_mark_invalid,
* since the root still has refcount=0.
*
* However, tdp_mmu_zap_root can yield, and writers do not expect to
* see refcount=0 (see for example kvm_tdp_mmu_invalidate_all_roots()).
* So the root temporarily gets an extra reference, going to refcount=1
* while staying invalid. Readers still cannot acquire any reference;
* but writers are now allowed to run if tdp_mmu_zap_root yields and
* they might take an extra reference if they themselves yield.
* Therefore, when the reference is given back by the worker,
* there is no guarantee that the refcount is still 1. If not, whoever
* puts the last reference will free the page, but they will not have to
* zap the root because a root cannot go from invalid to valid.
*/ */
if (!kvm_tdp_root_mark_invalid(root)) { KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm);
refcount_set(&root->tdp_mmu_root_count, 1);
/*
* Zapping the root in a worker is not just "nice to have";
* it is required because kvm_tdp_mmu_invalidate_all_roots()
* skips already-invalid roots. If kvm_tdp_mmu_put_root() did
* not add the root to the workqueue, kvm_tdp_mmu_zap_all_fast()
* might return with some roots not zapped yet.
*/
tdp_mmu_schedule_zap_root(kvm, root);
return;
}
spin_lock(&kvm->arch.tdp_mmu_pages_lock); spin_lock(&kvm->arch.tdp_mmu_pages_lock);
list_del_rcu(&root->link); list_del_rcu(&root->link);
...@@ -320,7 +287,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) ...@@ -320,7 +287,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
root = tdp_mmu_alloc_sp(vcpu); root = tdp_mmu_alloc_sp(vcpu);
tdp_mmu_init_sp(root, NULL, 0, role); tdp_mmu_init_sp(root, NULL, 0, role);
refcount_set(&root->tdp_mmu_root_count, 1); /*
* TDP MMU roots are kept until they are explicitly invalidated, either
* by a memslot update or by the destruction of the VM. Initialize the
* refcount to two; one reference for the vCPU, and one reference for
* the TDP MMU itself, which is held until the root is invalidated and
* is ultimately put by tdp_mmu_zap_root_work().
*/
refcount_set(&root->tdp_mmu_root_count, 2);
spin_lock(&kvm->arch.tdp_mmu_pages_lock); spin_lock(&kvm->arch.tdp_mmu_pages_lock);
list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots);
...@@ -946,32 +920,49 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) ...@@ -946,32 +920,49 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
/* /*
* Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that
* is about to be zapped, e.g. in response to a memslots update. The actual * is about to be zapped, e.g. in response to a memslots update. The actual
* zapping is performed asynchronously, so a reference is taken on all roots. * zapping is performed asynchronously. Using a separate workqueue makes it
* Using a separate workqueue makes it easy to ensure that the destruction is * easy to ensure that the destruction is performed before the "fast zap"
* performed before the "fast zap" completes, without keeping a separate list * completes, without keeping a separate list of invalidated roots; the list is
* of invalidated roots; the list is effectively the list of work items in * effectively the list of work items in the workqueue.
* the workqueue.
*
* Get a reference even if the root is already invalid, the asynchronous worker
* assumes it was gifted a reference to the root it processes. Because mmu_lock
* is held for write, it should be impossible to observe a root with zero refcount,
* i.e. the list of roots cannot be stale.
* *
* This has essentially the same effect for the TDP MMU * Note, the asynchronous worker is gifted the TDP MMU's reference.
* as updating mmu_valid_gen does for the shadow MMU. * See kvm_tdp_mmu_get_vcpu_root_hpa().
*/ */
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
{ {
struct kvm_mmu_page *root; struct kvm_mmu_page *root;
lockdep_assert_held_write(&kvm->mmu_lock); /*
list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { * mmu_lock must be held for write to ensure that a root doesn't become
if (!root->role.invalid && * invalid while there are active readers (invalidating a root while
!WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root))) { * there are active readers may or may not be problematic in practice,
* but it's uncharted territory and not supported).
*
* Waive the assertion if there are no users of @kvm, i.e. the VM is
* being destroyed after all references have been put, or if no vCPUs
* have been created (which means there are no roots), i.e. the VM is
* being destroyed in an error path of KVM_CREATE_VM.
*/
if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
refcount_read(&kvm->users_count) && kvm->created_vcpus)
lockdep_assert_held_write(&kvm->mmu_lock);
/*
* As above, mmu_lock isn't held when destroying the VM! There can't
* be other references to @kvm, i.e. nothing else can invalidate roots
* or be consuming roots, but walking the list of roots does need to be
* guarded against roots being deleted by the asynchronous zap worker.
*/
rcu_read_lock();
list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) {
if (!root->role.invalid) {
root->role.invalid = true; root->role.invalid = true;
tdp_mmu_schedule_zap_root(kvm, root); tdp_mmu_schedule_zap_root(kvm, root);
} }
} }
rcu_read_unlock();
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment