Commit 43d8ac22 authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/pkvm-hyp-sharing into kvmarm-master/next

* kvm-arm64/pkvm-hyp-sharing:
  : .
  : Series from Quentin Perret, implementing HYP page share/unshare:
  :
  : This series implements an unshare hypercall at EL2 in nVHE
  : protected mode, and makes use of it to unmmap guest-specific
  : data-structures from EL2 stage-1 during guest tear-down.
  : Crucially, the implementation of the share and unshare
  : routines use page refcounts in the host kernel to avoid
  : accidentally unmapping data-structures that overlap a common
  : page.
  : [...]
  : .
  KVM: arm64: pkvm: Unshare guest structs during teardown
  KVM: arm64: Expose unshare hypercall to the host
  KVM: arm64: Implement do_unshare() helper for unsharing memory
  KVM: arm64: Implement __pkvm_host_share_hyp() using do_share()
  KVM: arm64: Implement do_share() helper for sharing memory
  KVM: arm64: Introduce wrappers for host and hyp spin lock accessors
  KVM: arm64: Extend pkvm_page_state enumeration to handle absent pages
  KVM: arm64: pkvm: Refcount the pages shared with EL2
  KVM: arm64: Introduce kvm_share_hyp()
  KVM: arm64: Implement kvm_pgtable_hyp_unmap() at EL2
  KVM: arm64: Hook up ->page_count() for hypervisor stage-1 page-table
  KVM: arm64: Fixup hyp stage-1 refcount
  KVM: arm64: Refcount hyp stage-1 pgtable pages
  KVM: arm64: Provide {get,put}_page() stubs for early hyp allocator
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents ce5b5b05 52b28657
...@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func { ...@@ -63,6 +63,7 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */ /* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
......
...@@ -321,6 +321,7 @@ struct kvm_vcpu_arch { ...@@ -321,6 +321,7 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch external_debug_state; struct kvm_guest_debug_arch external_debug_state;
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
struct task_struct *parent_task;
struct { struct {
/* {Break,watch}point registers */ /* {Break,watch}point registers */
...@@ -737,6 +738,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); ...@@ -737,6 +738,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{ {
......
...@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) ...@@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/kvm_pgtable.h> #include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h> #include <asm/stage2_pgtable.h>
int kvm_share_hyp(void *from, void *to);
void kvm_unshare_hyp(void *from, void *to);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr, void __iomem **kaddr,
......
...@@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); ...@@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot); enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
* @addr: Virtual address from which to remove the mapping.
* @size: Size of the mapping.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
* boundary.
*
* TLB invalidation is performed for each page-table entry cleared during the
* unmapping operation and the reference count for the page-table page
* containing the cleared entry is decremented, with unreferenced pages being
* freed. The unmapping operation will stop early if it encounters either an
* invalid page-table entry or a valid block mapping which maps beyond the range
* being unmapped.
*
* Return: Number of bytes unmapped, which may be 0.
*/
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
/** /**
* kvm_get_vtcr() - Helper to construct VTCR_EL2 * kvm_get_vtcr() - Helper to construct VTCR_EL2
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
......
...@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret) if (ret)
return ret; return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); ret = kvm_share_hyp(kvm, kvm + 1);
if (ret) if (ret)
goto out_free_stage2_pgd; goto out_free_stage2_pgd;
...@@ -188,6 +188,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -188,6 +188,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
} }
} }
atomic_set(&kvm->online_vcpus, 0); atomic_set(&kvm->online_vcpus, 0);
kvm_unshare_hyp(kvm, kvm + 1);
} }
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
...@@ -342,7 +344,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -342,7 +344,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err) if (err)
return err; return err;
return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); return kvm_share_hyp(vcpu, vcpu + 1);
} }
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
......
...@@ -14,6 +14,19 @@ ...@@ -14,6 +14,19 @@
#include <asm/kvm_mmu.h> #include <asm/kvm_mmu.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
{
struct task_struct *p = vcpu->arch.parent_task;
struct user_fpsimd_state *fpsimd;
if (!is_protected_kvm_enabled() || !p)
return;
fpsimd = &p->thread.uw.fpsimd_state;
kvm_unshare_hyp(fpsimd, fpsimd + 1);
put_task_struct(p);
}
/* /*
* Called on entry to KVM_RUN unless this vcpu previously ran at least * Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from * once and the most recent prior KVM_RUN for this vcpu was called from
...@@ -29,12 +42,27 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) ...@@ -29,12 +42,27 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state; struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
kvm_vcpu_unshare_task_fp(vcpu);
/* Make sure the host task fpsimd state is visible to hyp: */ /* Make sure the host task fpsimd state is visible to hyp: */
ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP); ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (!ret) if (ret)
return ret;
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
return ret; /*
* We need to keep current's task_struct pinned until its data has been
* unshared with the hypervisor to make sure it is not re-used by the
* kernel and donated to someone else while already shared -- see
* kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
*/
if (is_protected_kvm_enabled()) {
get_task_struct(current);
vcpu->arch.parent_task = current;
}
return 0;
} }
/* /*
......
...@@ -24,6 +24,11 @@ enum pkvm_page_state { ...@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL, PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
KVM_PGTABLE_PROT_SW1,
/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE,
}; };
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
...@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id; ...@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void); int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_share_hyp(u64 pfn);
int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys); bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
......
...@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg) ...@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1); return hyp_early_alloc_contig(1);
} }
static void hyp_early_alloc_get_page(void *addr) { }
static void hyp_early_alloc_put_page(void *addr) { }
void hyp_early_alloc_init(void *virt, unsigned long size) void hyp_early_alloc_init(void *virt, unsigned long size)
{ {
base = cur = (unsigned long)virt; base = cur = (unsigned long)virt;
...@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size) ...@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page; hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt; hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys; hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
} }
...@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) ...@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
} }
static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{ {
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
...@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = { ...@@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_prot_finalize), HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_flush_vm_context),
......
This diff is collapsed.
...@@ -166,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, ...@@ -166,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag, enum kvm_pgtable_walk_flags flag,
void * const arg) void * const arg)
{ {
struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot; enum kvm_pgtable_prot prot;
enum pkvm_page_state state; enum pkvm_page_state state;
kvm_pte_t pte = *ptep; kvm_pte_t pte = *ptep;
...@@ -174,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, ...@@ -174,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte)) if (!kvm_pte_valid(pte))
return 0; return 0;
/*
* Fix-up the refcount for the page-table pages as the early allocator
* was unable to access the hyp_vmemmap and so the buddy allocator has
* initialised the refcount to '1'.
*/
mm_ops->get_page(ptep);
if (flag != KVM_PGTABLE_WALK_LEAF)
return 0;
if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL; return -EINVAL;
...@@ -206,7 +216,8 @@ static int finalize_host_mappings(void) ...@@ -206,7 +216,8 @@ static int finalize_host_mappings(void)
{ {
struct kvm_pgtable_walker walker = { struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker, .cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pkvm_pgtable.mm_ops,
}; };
int i, ret; int i, ret;
...@@ -241,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void) ...@@ -241,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret) if (ret)
goto out; goto out;
ret = finalize_host_mappings();
if (ret)
goto out;
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page, .zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt, .phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys, .virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page, .get_page = hpool_get_page,
.put_page = hpool_put_page, .put_page = hpool_put_page,
.page_count = hyp_page_count,
}; };
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
ret = finalize_host_mappings();
if (ret)
goto out;
out: out:
/* /*
* We tail-called to here from handle___pkvm_init() and will not return, * We tail-called to here from handle___pkvm_init() and will not return,
......
...@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) ...@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot; return prot;
} }
static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
{
/*
* Tolerate KVM recreating the exact same mapping, or changing software
* bits if the existing mapping was valid.
*/
if (old == new)
return false;
if (!kvm_pte_valid(old))
return true;
return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
}
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data) kvm_pte_t *ptep, struct hyp_map_data *data)
{ {
...@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, ...@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level)) if (!kvm_block_mapping_supported(addr, end, phys, level))
return false; return false;
data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level); new = kvm_init_valid_leaf_pte(phys, data->attr, level);
if (hyp_pte_needs_update(old, new)) if (old == new)
smp_store_release(ptep, new); return true;
if (!kvm_pte_valid(old))
data->mm_ops->get_page(ptep);
else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
return false;
data->phys += granule; smp_store_release(ptep, new);
return true; return true;
} }
...@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, ...@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM; return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops); kvm_set_table_pte(ptep, childp, mm_ops);
mm_ops->get_page(ptep);
return 0; return 0;
} }
...@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, ...@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret; return ret;
} }
struct hyp_unmap_data {
u64 unmapped;
struct kvm_pgtable_mm_ops *mm_ops;
};
static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
kvm_pte_t pte = *ptep, *childp = NULL;
u64 granule = kvm_granule_size(level);
struct hyp_unmap_data *data = arg;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_pte_valid(pte))
return -EINVAL;
if (kvm_pte_table(pte, level)) {
childp = kvm_pte_follow(pte, mm_ops);
if (mm_ops->page_count(childp) != 1)
return 0;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
} else {
if (end - addr < granule)
return -EINVAL;
kvm_clear_pte(ptep);
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
data->unmapped += granule;
}
dsb(ish);
isb();
mm_ops->put_page(ptep);
if (childp)
mm_ops->put_page(childp);
return 0;
}
u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
struct hyp_unmap_data unmap_data = {
.mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = hyp_unmap_walker,
.arg = &unmap_data,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
if (!pgt->mm_ops->page_count)
return 0;
kvm_pgtable_walk(pgt, addr, size, &walker);
return unmap_data.unmapped;
}
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops) struct kvm_pgtable_mm_ops *mm_ops)
{ {
...@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, ...@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg) enum kvm_pgtable_walk_flags flag, void * const arg)
{ {
struct kvm_pgtable_mm_ops *mm_ops = arg; struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte))
return 0;
mm_ops->put_page(ptep);
if (kvm_pte_table(pte, level))
mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0; return 0;
} }
...@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) ...@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{ {
struct kvm_pgtable_walker walker = { struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker, .cb = hyp_free_walker,
.flags = KVM_PGTABLE_WALK_TABLE_POST, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops, .arg = pgt->mm_ops,
}; };
......
...@@ -284,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) ...@@ -284,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
} }
} }
static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) struct hyp_shared_pfn {
u64 pfn;
int count;
struct rb_node node;
};
static DEFINE_MUTEX(hyp_shared_pfns_lock);
static struct rb_root hyp_shared_pfns = RB_ROOT;
static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
struct rb_node **parent)
{ {
phys_addr_t addr; struct hyp_shared_pfn *this;
*node = &hyp_shared_pfns.rb_node;
*parent = NULL;
while (**node) {
this = container_of(**node, struct hyp_shared_pfn, node);
*parent = **node;
if (this->pfn < pfn)
*node = &((**node)->rb_left);
else if (this->pfn > pfn)
*node = &((**node)->rb_right);
else
return this;
}
return NULL;
}
static int share_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (this) {
this->count++;
goto unlock;
}
this = kzalloc(sizeof(*this), GFP_KERNEL);
if (!this) {
ret = -ENOMEM;
goto unlock;
}
this->pfn = pfn;
this->count = 1;
rb_link_node(&this->node, parent, node);
rb_insert_color(&this->node, &hyp_shared_pfns);
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
static int unshare_pfn_hyp(u64 pfn)
{
struct rb_node **node, *parent;
struct hyp_shared_pfn *this;
int ret = 0;
mutex_lock(&hyp_shared_pfns_lock);
this = find_shared_pfn(pfn, &node, &parent);
if (WARN_ON(!this)) {
ret = -ENOENT;
goto unlock;
}
this->count--;
if (this->count)
goto unlock;
rb_erase(&this->node, &hyp_shared_pfns);
kfree(this);
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
unlock:
mutex_unlock(&hyp_shared_pfns_lock);
return ret;
}
int kvm_share_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
int ret; int ret;
for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) { if (is_kernel_in_hyp_mode())
ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, return 0;
__phys_to_pfn(addr));
/*
* The share hcall maps things in the 'fixed-offset' region of the hyp
* VA space, so we can only share physically contiguous data-structures
* for now.
*/
if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
return -EINVAL;
if (kvm_host_owns_hyp_mappings())
return create_hyp_mappings(from, to, PAGE_HYP);
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
ret = share_pfn_hyp(pfn);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -299,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) ...@@ -299,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
return 0; return 0;
} }
void kvm_unshare_hyp(void *from, void *to)
{
phys_addr_t start, end, cur;
u64 pfn;
if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
return;
start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
end = PAGE_ALIGN(__pa(to));
for (cur = start; cur < end; cur += PAGE_SIZE) {
pfn = __phys_to_pfn(cur);
WARN_ON(unshare_pfn_hyp(pfn));
}
}
/** /**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range * @from: The virtual kernel start address of the range
...@@ -319,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) ...@@ -319,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
if (is_kernel_in_hyp_mode()) if (is_kernel_in_hyp_mode())
return 0; return 0;
if (!kvm_host_owns_hyp_mappings()) { if (!kvm_host_owns_hyp_mappings())
if (WARN_ON(prot != PAGE_HYP))
return -EPERM; return -EPERM;
return pkvm_share_hyp(kvm_kaddr_to_phys(from),
kvm_kaddr_to_phys(to));
}
start = start & PAGE_MASK; start = start & PAGE_MASK;
end = PAGE_ALIGN(end); end = PAGE_ALIGN(end);
......
...@@ -113,7 +113,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) ...@@ -113,7 +113,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
ret = create_hyp_mappings(buf, buf + reg_sz, PAGE_HYP); ret = kvm_share_hyp(buf, buf + reg_sz);
if (ret) { if (ret) {
kfree(buf); kfree(buf);
return ret; return ret;
...@@ -150,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) ...@@ -150,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
kfree(vcpu->arch.sve_state); void *sve_state = vcpu->arch.sve_state;
kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
} }
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment