Commit fe49fd94 authored by Marc Zyngier's avatar Marc Zyngier Committed by Oliver Upton

KVM: arm64: Move VTCR_EL2 into struct s2_mmu

We currently have a global VTCR_EL2 value for each guest, even
if the guest uses NV. This implies that the guest's own S2 must
fit in the host's. This is odd, for multiple reasons:

- the PARange values and the number of IPA bits don't necessarily
  match: you can have 33 bits of IPA space, and yet you can only
  describe 32 or 36 bits of PARange

- When userspace set the IPA space, it creates a contract with the
  kernel saying "this is the IPA space I'm prepared to handle".
  At no point does it constraint the guest's own IPA space as
  long as the guest doesn't try to use a [I]PA outside of the
  IPA space set by userspace

- We don't even try to hide the value of ID_AA64MMFR0_EL1.PARange.

And then there is the consequence of the above: if a guest tries
to create a S2 that has for input address something that is larger
than the IPA space defined by the host, we inject a fatal exception.

This is no good. For all intent and purposes, a guest should be
able to have the S2 it really wants, as long as the *output* address
of that S2 isn't outside of the IPA space.

For that, we need to have a per-s2_mmu VTCR_EL2 setting, which
allows us to represent the full PARange. Move the vctr field into
the s2_mmu structure, which has no impact whatsoever, except for NV.

Note that once we are able to override ID_AA64MMFR0_EL1.PARange
from userspace, we'll also be able to restrict the size of the
shadow S2 that NV uses.
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231012205108.3937270-1-maz@kernel.orgSigned-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parent 934bf871
...@@ -158,6 +158,16 @@ struct kvm_s2_mmu { ...@@ -158,6 +158,16 @@ struct kvm_s2_mmu {
phys_addr_t pgd_phys; phys_addr_t pgd_phys;
struct kvm_pgtable *pgt; struct kvm_pgtable *pgt;
/*
* VTCR value used on the host. For a non-NV guest (or a NV
* guest that runs in a context where its own S2 doesn't
* apply), its T0SZ value reflects that of the IPA size.
*
* For a shadow S2 MMU, T0SZ reflects the PARange exposed to
* the guest.
*/
u64 vtcr;
/* The last vcpu id that ran on each physical CPU */ /* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran; int __percpu *last_vcpu_ran;
...@@ -205,9 +215,6 @@ struct kvm_protected_vm { ...@@ -205,9 +215,6 @@ struct kvm_protected_vm {
struct kvm_arch { struct kvm_arch {
struct kvm_s2_mmu mmu; struct kvm_s2_mmu mmu;
/* VTCR_EL2 value for this VM */
u64 vtcr;
/* Interrupt controller */ /* Interrupt controller */
struct vgic_dist vgic; struct vgic_dist vgic;
......
...@@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) ...@@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
*/ */
#define KVM_PHYS_SHIFT (40) #define KVM_PHYS_SHIFT (40)
#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) #define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) #define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) #define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
#include <asm/kvm_pgtable.h> #include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h> #include <asm/stage2_pgtable.h>
...@@ -299,7 +299,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) ...@@ -299,7 +299,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
struct kvm_arch *arch) struct kvm_arch *arch)
{ {
write_sysreg(arch->vtcr, vtcr_el2); write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/* /*
......
...@@ -21,13 +21,13 @@ ...@@ -21,13 +21,13 @@
* (IPA_SHIFT - 4). * (IPA_SHIFT - 4).
*/ */
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) #define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
/* /*
* kvm_mmmu_cache_min_pages() is the number of pages required to install * kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at * a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation. * the VM creation.
*/ */
#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) #define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
#endif /* __ARM64_S2_PGTABLE_H_ */ #endif /* __ARM64_S2_PGTABLE_H_ */
...@@ -129,8 +129,8 @@ static void prepare_host_vtcr(void) ...@@ -129,8 +129,8 @@ static void prepare_host_vtcr(void)
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
id_aa64mmfr1_el1_sys_val, phys_shift); id_aa64mmfr1_el1_sys_val, phys_shift);
} }
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
...@@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) ...@@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
unsigned long nr_pages; unsigned long nr_pages;
int ret; int ret;
nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
if (ret) if (ret)
return ret; return ret;
...@@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void) ...@@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void)
return -EPERM; return -EPERM;
params->vttbr = kvm_get_vttbr(mmu); params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_mmu.arch.vtcr; params->vtcr = mmu->vtcr;
params->hcr_el2 |= HCR_VM; params->hcr_el2 |= HCR_VM;
/* /*
......
...@@ -303,7 +303,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, ...@@ -303,7 +303,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
{ {
hyp_vm->host_kvm = host_kvm; hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus; hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
} }
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
...@@ -483,7 +483,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, ...@@ -483,7 +483,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
} }
vm_size = pkvm_get_hyp_vm_size(nr_vcpus); vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
ret = -ENOMEM; ret = -ENOMEM;
......
...@@ -1511,7 +1511,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, ...@@ -1511,7 +1511,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
kvm_pgtable_force_pte_cb_t force_pte_cb) kvm_pgtable_force_pte_cb_t force_pte_cb)
{ {
size_t pgd_sz; size_t pgd_sz;
u64 vtcr = mmu->arch->vtcr; u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
......
...@@ -892,7 +892,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t ...@@ -892,7 +892,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
if (mmu->pgt != NULL) { if (mmu->pgt != NULL) {
kvm_err("kvm_arch already initialized?\n"); kvm_err("kvm_arch already initialized?\n");
...@@ -1067,7 +1067,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, ...@@ -1067,7 +1067,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr; phys_addr_t addr;
int ret = 0; int ret = 0;
struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO }; struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
struct kvm_pgtable *pgt = mmu->pgt;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0); (writable ? KVM_PGTABLE_PROT_W : 0);
...@@ -1080,7 +1081,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, ...@@ -1080,7 +1081,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) { for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
ret = kvm_mmu_topup_memory_cache(&cache, ret = kvm_mmu_topup_memory_cache(&cache,
kvm_mmu_cache_min_pages(kvm)); kvm_mmu_cache_min_pages(mmu));
if (ret) if (ret)
break; break;
...@@ -1431,7 +1432,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ...@@ -1431,7 +1432,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (fault_status != ESR_ELx_FSC_PERM || if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) { (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache, ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm)); kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
if (ret) if (ret)
return ret; return ret;
} }
...@@ -1747,7 +1748,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) ...@@ -1747,7 +1748,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
} }
/* Userspace should not be able to register out-of-bounds IPAs */ /* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
if (fault_status == ESR_ELx_FSC_ACCESS) { if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa); handle_access_fault(vcpu, fault_ipa);
...@@ -2021,7 +2022,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -2021,7 +2022,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA * Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space. * space addressable by the KVM guest IPA space.
*/ */
if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT))
return -EFAULT; return -EFAULT;
hva = new->userspace_addr; hva = new->userspace_addr;
......
...@@ -123,7 +123,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) ...@@ -123,7 +123,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
if (host_kvm->created_vcpus < 1) if (host_kvm->created_vcpus < 1)
return -EINVAL; return -EINVAL;
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
/* /*
* The PGD pages will be reclaimed using a hyp_memcache which implies * The PGD pages will be reclaimed using a hyp_memcache which implies
......
...@@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, ...@@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
if (addr + size < addr) if (addr + size < addr)
return -EINVAL; return -EINVAL;
if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) if (addr & ~kvm_phys_mask(&kvm->arch.mmu) ||
(addr + size) > kvm_phys_size(&kvm->arch.mmu))
return -E2BIG; return -E2BIG;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment