Commit 982a847c authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/poe' into for-next/core

* for-next/poe: (31 commits)
  arm64: pkeys: remove redundant WARN
  kselftest/arm64: Add test case for POR_EL0 signal frame records
  kselftest/arm64: parse POE_MAGIC in a signal frame
  kselftest/arm64: add HWCAP test for FEAT_S1POE
  selftests: mm: make protection_keys test work on arm64
  selftests: mm: move fpregs printing
  kselftest/arm64: move get_header()
  arm64: add Permission Overlay Extension Kconfig
  arm64: enable PKEY support for CPUs with S1POE
  arm64: enable POE and PIE to coexist
  arm64/ptrace: add support for FEAT_POE
  arm64: add POE signal support
  arm64: implement PKEYS support
  arm64: add pte_access_permitted_no_overlay()
  arm64: handle PKEY/POE faults
  arm64: mask out POIndex when modifying a PTE
  arm64: convert protection key into vm_flags and pgprot values
  arm64: add POIndex defines
  arm64: re-order MTE VM_ flags
  arm64: enable the Permission Overlay Extension for EL0
  ...
parents 3175e051 10166c23
......@@ -365,6 +365,8 @@ HWCAP2_SME_SF8DP2
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
HWCAP2_POE
Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
4. Unused AT_HWCAP bits
-----------------------
......
......@@ -2138,6 +2138,29 @@ config ARM64_EPAN
if the cpu does not implement the feature.
endmenu # "ARMv8.7 architectural features"
menu "ARMv8.9 architectural features"
config ARM64_POE
prompt "Permission Overlay Extension"
def_bool y
select ARCH_USES_HIGH_VMA_FLAGS
select ARCH_HAS_PKEYS
help
The Permission Overlay Extension is used to implement Memory
Protection Keys. Memory Protection Keys provides a mechanism for
enforcing page-based protections, but without requiring modification
of the page tables when an application changes protection domains.
For details, see Documentation/core-api/protection-keys.rst
If unsure, say y.
config ARCH_PKEY_BITS
int
default 3
endmenu # "ARMv8.9 architectural features"
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
......
......@@ -832,6 +832,12 @@ static inline bool system_supports_lpa2(void)
return cpus_have_final_cap(ARM64_HAS_LPA2);
}
static inline bool system_supports_poe(void)
{
return IS_ENABLED(CONFIG_ARM64_POE) &&
alternative_has_cap_unlikely(ARM64_HAS_S1POE);
}
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
......
......@@ -192,6 +192,14 @@
orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
.Lskip_pie_fgt_\@:
mrs_s x1, SYS_ID_AA64MMFR3_EL1
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4
cbz x1, .Lskip_poe_fgt_\@
/* Disable trapping of POR_EL0 */
orr x0, x0, #HFGxTR_EL2_nPOR_EL0
.Lskip_poe_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
......
......@@ -157,6 +157,7 @@
#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
/*
* This yields a mask that user programs can use to figure out what
......
......@@ -10,6 +10,7 @@
#include <asm/hyp_image.h>
#include <asm/insn.h>
#include <asm/virt.h>
#include <asm/sysreg.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
......@@ -259,7 +260,7 @@ extern u64 __kvm_get_mdcr_el2(void);
asm volatile( \
" mrs %1, spsr_el2\n" \
" mrs %2, elr_el2\n" \
"1: at "at_op", %3\n" \
"1: " __msr_s(at_op, "%3") "\n" \
" isb\n" \
" b 9f\n" \
"2: msr spsr_el2, %1\n" \
......
......@@ -446,6 +446,8 @@ enum vcpu_sysreg {
GCR_EL1, /* Tag Control Register */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
POR_EL0, /* Permission Overlay Register 0 (EL0) */
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
......@@ -517,6 +519,8 @@ enum vcpu_sysreg {
VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
......
......@@ -7,7 +7,7 @@
#include <uapi/asm/mman.h>
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey __always_unused)
unsigned long pkey)
{
unsigned long ret = 0;
......@@ -17,6 +17,14 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
if (system_supports_mte() && (prot & PROT_MTE))
ret |= VM_MTE;
#ifdef CONFIG_ARCH_HAS_PKEYS
if (system_supports_poe()) {
ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0;
ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0;
ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0;
}
#endif
return ret;
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
......
......@@ -25,6 +25,7 @@ typedef struct {
refcount_t pinned;
void *vdso;
unsigned long flags;
u8 pkey_allocation_map;
} mm_context_t;
/*
......
......@@ -15,12 +15,12 @@
#include <linux/sched/hotplug.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
#include <linux/pkeys.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/proc-fns.h>
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
......@@ -175,9 +175,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
atomic64_set(&mm->context.id, 0);
refcount_set(&mm->context.pinned, 0);
/* pkey 0 is the default, so always reserve it. */
mm->context.pkey_allocation_map = BIT(0);
return 0;
}
static inline void arch_dup_pkeys(struct mm_struct *oldmm,
struct mm_struct *mm)
{
/* Duplicate the oldmm pkey state in mm: */
mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
}
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
arch_dup_pkeys(oldmm, mm);
return 0;
}
static inline void arch_exit_mmap(struct mm_struct *mm)
{
}
static inline void arch_unmap(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
......@@ -267,6 +294,23 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm)
return -1UL >> 8;
}
/*
* Only enforce protection keys on the current process, because there is no
* user context to access POR_EL0 for another address space.
*/
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
if (!system_supports_poe())
return true;
/* allow access if the VMA is not one from this process */
if (foreign || vma_is_foreign(vma))
return true;
return por_el0_allows_pkey(vma_pkey(vma), write, execute);
}
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */
......
......@@ -198,6 +198,16 @@
#define PTE_PI_IDX_2 53 /* PXN */
#define PTE_PI_IDX_3 54 /* UXN */
/*
* POIndex[2:0] encoding (Permission Overlay Extension)
*/
#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60)
#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61)
#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62)
#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60)
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
......
......@@ -154,10 +154,10 @@ static inline bool __pure lpa2_is_enabled(void)
#define PIE_E0 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
#define PIE_E1 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
......
......@@ -34,6 +34,7 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <asm/por.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
......@@ -149,6 +150,24 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_accessible(mm, pte) \
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
{
u64 por;
if (!system_supports_poe())
return true;
por = read_sysreg_s(SYS_POR_EL0);
if (write)
return por_elx_allows_write(por, pkey);
if (execute)
return por_elx_allows_exec(por, pkey);
return por_elx_allows_read(por, pkey);
}
/*
* p??_access_permitted() is true for valid user mappings (PTE_USER
* bit set, subject to the write permission check). For execute-only
......@@ -156,8 +175,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
* not set) must return false. PROT_NONE mappings do not have the
* PTE_VALID bit set.
*/
#define pte_access_permitted(pte, write) \
#define pte_access_permitted_no_overlay(pte, write) \
(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
#define pte_access_permitted(pte, write) \
(pte_access_permitted_no_overlay(pte, write) && \
por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
......@@ -373,10 +395,11 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
/*
* If the PTE would provide user space access to the tags associated
* with it then ensure that the MTE tags are synchronised. Although
* pte_access_permitted() returns false for exec only mappings, they
* don't expose tags (instruction fetches don't check tags).
* pte_access_permitted_no_overlay() returns false for exec only
* mappings, they don't expose tags (instruction fetches don't check
* tags).
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
mte_sync_tags(pte, nr_pages);
}
......@@ -1103,7 +1126,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
PTE_GP | PTE_ATTRINDX_MASK;
PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2023 Arm Ltd.
*
* Based on arch/x86/include/asm/pkeys.h
*/
#ifndef _ASM_ARM64_PKEYS_H
#define _ASM_ARM64_PKEYS_H
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2)
#define arch_max_pkey() 8
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
static inline bool arch_pkeys_enabled(void)
{
return system_supports_poe();
}
static inline int vma_pkey(struct vm_area_struct *vma)
{
return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
}
static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
int prot, int pkey)
{
if (pkey != -1)
return pkey;
return vma_pkey(vma);
}
static inline int execute_only_pkey(struct mm_struct *mm)
{
// Execute-only mappings are handled by EPAN/FEAT_PAN3.
return -1;
}
#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map
#define mm_set_pkey_allocated(mm, pkey) do { \
mm_pkey_allocation_map(mm) |= (1U << pkey); \
} while (0)
#define mm_set_pkey_free(mm, pkey) do { \
mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
} while (0)
static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
/*
* "Allocated" pkeys are those that have been returned
* from pkey_alloc() or pkey 0 which is allocated
* implicitly when the mm is created.
*/
if (pkey < 0 || pkey >= arch_max_pkey())
return false;
return mm_pkey_allocation_map(mm) & (1U << pkey);
}
/*
* Returns a positive, 3-bit key on success, or -1 on failure.
*/
static inline int mm_pkey_alloc(struct mm_struct *mm)
{
/*
* Note: this is the one and only place we make sure
* that the pkey is valid as far as the hardware is
* concerned. The rest of the kernel trusts that
* only good, valid pkeys come out of here.
*/
u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0);
int ret;
if (!arch_pkeys_enabled())
return -1;
/*
* Are we out of pkeys? We must handle this specially
* because ffz() behavior is undefined if there are no
* zeros.
*/
if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
return -1;
ret = ffz(mm_pkey_allocation_map(mm));
mm_set_pkey_allocated(mm, ret);
return ret;
}
static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
{
if (!mm_pkey_is_allocated(mm, pkey))
return -EINVAL;
mm_set_pkey_free(mm, pkey);
return 0;
}
#endif /* _ASM_ARM64_PKEYS_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2023 Arm Ltd.
*/
#ifndef _ASM_ARM64_POR_H
#define _ASM_ARM64_POR_H
#define POR_BITS_PER_PKEY 4
#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf)
static inline bool por_elx_allows_read(u64 por, u8 pkey)
{
u8 perm = POR_ELx_IDX(por, pkey);
return perm & POE_R;
}
static inline bool por_elx_allows_write(u64 por, u8 pkey)
{
u8 perm = POR_ELx_IDX(por, pkey);
return perm & POE_W;
}
static inline bool por_elx_allows_exec(u64 por, u8 pkey)
{
u8 perm = POR_ELx_IDX(por, pkey);
return perm & POE_X;
}
#endif /* _ASM_ARM64_POR_H */
......@@ -184,6 +184,7 @@ struct thread_struct {
u64 sctlr_user;
u64 svcr;
u64 tpidr2_el0;
u64 por_el0;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
......
......@@ -1076,6 +1076,9 @@
#define POE_RXW UL(0x7)
#define POE_MASK UL(0xf)
/* Initial value for Permission Overlay Extension for EL0 */
#define POR_EL0_INIT POE_RXW
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
......
......@@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
......
......@@ -52,6 +52,7 @@
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIRE0_EL2 0x298
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
#define VNCR_ICH_LR0_EL2 0x400
#define VNCR_ICH_LR1_EL2 0x408
#define VNCR_ICH_LR2_EL2 0x410
......
......@@ -122,5 +122,6 @@
#define HWCAP2_SME_SF8FMA (1UL << 60)
#define HWCAP2_SME_SF8DP4 (1UL << 61)
#define HWCAP2_SME_SF8DP2 (1UL << 62)
#define HWCAP2_POE (1UL << 63)
#endif /* _UAPI__ASM_HWCAP_H */
......@@ -7,4 +7,13 @@
#define PROT_BTI 0x10 /* BTI guarded page */
#define PROT_MTE 0x20 /* Normal Tagged mapping */
/* Override any generic PKEY permission defines */
#define PKEY_DISABLE_EXECUTE 0x4
#define PKEY_DISABLE_READ 0x8
#undef PKEY_ACCESS_MASK
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
PKEY_DISABLE_WRITE |\
PKEY_DISABLE_READ |\
PKEY_DISABLE_EXECUTE)
#endif /* ! _UAPI__ASM_MMAN_H */
......@@ -98,6 +98,13 @@ struct esr_context {
__u64 esr;
};
#define POE_MAGIC 0x504f4530
struct poe_context {
struct _aarch64_ctx head;
__u64 por_el0;
};
/*
* extra_context: describes extra space in the signal frame for
* additional structures that don't fit in sigcontext.__reserved[].
......
......@@ -466,6 +466,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
......@@ -2348,6 +2350,14 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
#ifdef CONFIG_ARM64_POE
static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
{
sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
}
#endif
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
......@@ -2870,6 +2880,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_nv1,
ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
},
#ifdef CONFIG_ARM64_POE
{
.desc = "Stage-1 Permission Overlay Extension (S1POE)",
.capability = ARM64_HAS_S1POE,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
.cpu_enable = cpu_enable_poe,
ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
},
#endif
{},
};
......@@ -3034,6 +3054,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
#ifdef CONFIG_ARM64_POE
HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
#endif
{},
};
......
......@@ -143,6 +143,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
[KERNEL_HWCAP_POE] = "poe",
};
#ifdef CONFIG_COMPAT
......
......@@ -271,12 +271,21 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
static void flush_poe(void)
{
if (!system_supports_poe())
return;
write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
}
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
flush_poe();
}
void arch_release_task_struct(struct task_struct *tsk)
......@@ -371,6 +380,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (system_supports_tpidr2())
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
if (system_supports_poe())
p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
......@@ -495,6 +507,17 @@ static void erratum_1418040_new_exec(void)
preempt_enable();
}
static void permission_overlay_switch(struct task_struct *next)
{
if (!system_supports_poe())
return;
current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (current->thread.por_el0 != next->thread.por_el0) {
write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
}
}
/*
* __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
* this function must be called with preemption disabled and the update to
......@@ -530,6 +553,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
ssbs_thread_switch(next);
erratum_1418040_thread_switch(next);
ptrauth_thread_switch_user(next);
permission_overlay_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
......
......@@ -1440,6 +1440,39 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
#ifdef CONFIG_ARM64_POE
static int poe_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
if (!system_supports_poe())
return -EINVAL;
return membuf_write(&to, &target->thread.por_el0,
sizeof(target->thread.por_el0));
}
static int poe_set(struct task_struct *target, const struct
user_regset *regset, unsigned int pos,
unsigned int count, const void *kbuf, const
void __user *ubuf)
{
int ret;
long ctrl;
if (!system_supports_poe())
return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
if (ret)
return ret;
target->thread.por_el0 = ctrl;
return 0;
}
#endif
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
......@@ -1469,6 +1502,9 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
#ifdef CONFIG_ARM64_POE
REGSET_POE
#endif
};
static const struct user_regset aarch64_regsets[] = {
......@@ -1628,6 +1664,16 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
#ifdef CONFIG_ARM64_POE
[REGSET_POE] = {
.core_note_type = NT_ARM_POE,
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
.regset_get = poe_get,
.set = poe_set,
},
#endif
};
static const struct user_regset_view user_aarch64_view = {
......
......@@ -61,6 +61,7 @@ struct rt_sigframe_user_layout {
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
......@@ -185,6 +186,8 @@ struct user_ctxs {
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
struct poe_context __user *poe;
u32 poe_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
......@@ -258,6 +261,32 @@ static int restore_fpmr_context(struct user_ctxs *user)
return err;
}
static int preserve_poe_context(struct poe_context __user *ctx)
{
int err = 0;
__put_user_error(POE_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
__put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
return err;
}
static int restore_poe_context(struct user_ctxs *user)
{
u64 por_el0;
int err = 0;
if (user->poe_size != sizeof(*user->poe))
return -EINVAL;
__get_user_error(por_el0, &(user->poe->por_el0), err);
if (!err)
write_sysreg_s(por_el0, SYS_POR_EL0);
return err;
}
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
......@@ -621,6 +650,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
user->poe = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
......@@ -671,6 +701,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
case POE_MAGIC:
if (!system_supports_poe())
goto invalid;
if (user->poe)
goto invalid;
user->poe = (struct poe_context __user *)head;
user->poe_size = size;
break;
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
......@@ -857,6 +898,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
if (err == 0 && system_supports_poe() && user.poe)
err = restore_poe_context(&user);
return err;
}
......@@ -980,6 +1024,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
if (system_supports_poe()) {
err = sigframe_alloc(user, &user->poe_offset,
sizeof(struct poe_context));
if (err)
return err;
}
return sigframe_alloc_end(user);
}
......@@ -1042,6 +1093,14 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_fpmr_context(fpmr_ctx);
}
if (system_supports_poe() && err == 0 && user->poe_offset) {
struct poe_context __user *poe_ctx =
apply_user_offset(user, user->poe_offset);
err |= preserve_poe_context(poe_ctx);
}
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
......@@ -1178,6 +1237,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
if (system_supports_poe())
write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
......
......@@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
{
arm64_show_signal(SIGSEGV, str);
force_sig_pkuerr((void __user *)far, pkey);
}
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
......
......@@ -14,6 +14,7 @@
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
int ret;
u64 par, tmp;
/*
......@@ -27,7 +28,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
if (!__kvm_at("s1e1r", far))
ret = system_supports_poe() ? __kvm_at(OP_AT_S1E1A, far) :
__kvm_at(OP_AT_S1E1R, far);
if (!ret)
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
......
......@@ -16,9 +16,15 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
ctxt_sys_reg(ctxt, POR_EL0) = read_sysreg_s(SYS_POR_EL0);
}
static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
......@@ -66,6 +72,17 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP);
}
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu;
if (!system_supports_poe())
return false;
vcpu = ctxt_to_vcpu(ctxt);
return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP);
}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
......@@ -80,6 +97,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
}
if (ctxt_has_s1poe(ctxt))
ctxt_sys_reg(ctxt, POR_EL1) = read_sysreg_el1(SYS_POR);
}
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
......@@ -120,6 +140,10 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
write_sysreg_s(ctxt_sys_reg(ctxt, POR_EL0), SYS_POR_EL0);
}
static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
......@@ -158,6 +182,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
}
if (ctxt_has_s1poe(ctxt))
write_sysreg_el1(ctxt_sys_reg(ctxt, POR_EL1), SYS_POR);
}
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
......
......@@ -1557,6 +1557,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break;
case SYS_ID_AA64MMFR3_EL1:
val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
break;
......@@ -2256,6 +2259,15 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
return true;
}
static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
return 0;
return REG_HIDDEN;
}
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
......@@ -2419,7 +2431,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_IDS |
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_SANITISED(ID_AA64MMFR3_EL1),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
......@@ -2493,6 +2506,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1,
.visibility = s1poe_visibility },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
......@@ -2579,6 +2594,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmovs, .reg = PMOVSSET_EL0,
.get_user = get_pmreg, .set_user = set_pmreg },
{ SYS_DESC(SYS_POR_EL0), NULL, reset_unknown, POR_EL0,
.visibility = s1poe_visibility },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
{ SYS_DESC(SYS_TPIDR2_EL0), undef_access },
......@@ -4569,8 +4586,6 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 |
HFGxTR_EL2_nMAIR2_EL1 |
HFGxTR_EL2_nS2POR_EL1 |
HFGxTR_EL2_nPOR_EL1 |
HFGxTR_EL2_nPOR_EL0 |
HFGxTR_EL2_nACCDATA_EL1 |
HFGxTR_EL2_nSMPRI_EL1_MASK |
HFGxTR_EL2_nTPIDR2_EL0_MASK);
......@@ -4605,6 +4620,10 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
HFGxTR_EL2_nPOR_EL0);
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
HAFGRTR_EL2_RES1);
......
......@@ -23,6 +23,7 @@
#include <linux/sched/debug.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/pkeys.h>
#include <linux/preempt.h>
#include <linux/hugetlb.h>
......@@ -486,6 +487,23 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
unsigned int mm_flags)
{
unsigned long iss2 = ESR_ELx_ISS2(esr);
if (!system_supports_poe())
return false;
if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
return true;
return !arch_vma_access_permitted(vma,
mm_flags & FAULT_FLAG_WRITE,
mm_flags & FAULT_FLAG_INSTRUCTION,
false);
}
static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
......@@ -511,6 +529,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned long addr = untagged_addr(far);
struct vm_area_struct *vma;
int si_code;
int pkey = -1;
if (kprobe_page_fault(regs, esr))
return 0;
......@@ -575,6 +594,16 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
goto bad_area;
}
if (fault_from_pkey(esr, vma, mm_flags)) {
pkey = vma_pkey(vma);
vma_end_read(vma);
fault = 0;
si_code = SEGV_PKUERR;
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
goto bad_area;
}
fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
vma_end_read(vma);
......@@ -610,7 +639,16 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto bad_area;
}
if (fault_from_pkey(esr, vma, mm_flags)) {
pkey = vma_pkey(vma);
mmap_read_unlock(mm);
fault = 0;
si_code = SEGV_PKUERR;
goto bad_area;
}
fault = handle_mm_fault(vma, addr, mm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
......@@ -669,7 +707,22 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
/*
* The pkey value that we return to userspace can be different
* from the pkey that caused the fault.
*
* 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4);
* 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page
* 3. T1 : faults...
* 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
* 5. T1 : enters fault handler, takes mmap_lock, etc...
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
/* Something tried to access memory that out of memory map */
if (si_code == SEGV_PKUERR)
arm64_force_sig_fault_pkey(far, inf->name, pkey);
else
arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
}
......
......@@ -102,6 +102,17 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
if (vm_flags & VM_MTE)
prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
#ifdef CONFIG_ARCH_HAS_PKEYS
if (system_supports_poe()) {
if (vm_flags & VM_PKEY_BIT0)
prot |= PTE_PO_IDX_0;
if (vm_flags & VM_PKEY_BIT1)
prot |= PTE_PO_IDX_1;
if (vm_flags & VM_PKEY_BIT2)
prot |= PTE_PO_IDX_2;
}
#endif
return __pgprot(prot);
}
EXPORT_SYMBOL(vm_get_page_prot);
......@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/kfence.h>
#include <linux/pkeys.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
......@@ -1549,3 +1550,47 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
cpu_uninstall_idmap();
}
#ifdef CONFIG_ARCH_HAS_PKEYS
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
{
u64 new_por = POE_RXW;
u64 old_por;
u64 pkey_shift;
if (!system_supports_poe())
return -ENOSPC;
/*
* This code should only be called with valid 'pkey'
* values originating from in-kernel users. Complain
* if a bad value is observed.
*/
if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
return -EINVAL;
/* Set the bits we need in POR: */
new_por = POE_RXW;
if (init_val & PKEY_DISABLE_WRITE)
new_por &= ~POE_W;
if (init_val & PKEY_DISABLE_ACCESS)
new_por &= ~POE_RW;
if (init_val & PKEY_DISABLE_READ)
new_por &= ~POE_R;
if (init_val & PKEY_DISABLE_EXECUTE)
new_por &= ~POE_X;
/* Shift the bits in to the correct place in POR for pkey: */
pkey_shift = pkey * POR_BITS_PER_PKEY;
new_por <<= pkey_shift;
/* Get old POR and mask off any old bits in place: */
old_por = read_sysreg_s(SYS_POR_EL0);
old_por &= ~(POE_MASK << pkey_shift);
/* Write old part along with new part: */
write_sysreg_s(old_por | new_por, SYS_POR_EL0);
return 0;
}
#endif
......@@ -45,6 +45,7 @@ HAS_MOPS
HAS_NESTED_VIRT
HAS_PAN
HAS_S1PIE
HAS_S1POE
HAS_RAS_EXTN
HAS_RNG
HAS_SB
......
......@@ -1026,6 +1026,10 @@ config PPC_MEM_KEYS
If unsure, say y.
config ARCH_PKEY_BITS
int
default 5
config PPC_SECURE_BOOT
prompt "Enable secure boot support"
bool
......
......@@ -1889,6 +1889,10 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
If unsure, say y.
config ARCH_PKEY_BITS
int
default 4
choice
prompt "TSX enable mode"
depends on CPU_SUP_INTEL
......
......@@ -976,7 +976,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
#if VM_PKEY_BIT3
[ilog2(VM_PKEY_BIT3)] = "",
#endif
#if VM_PKEY_BIT4
[ilog2(VM_PKEY_BIT4)] = "",
#endif
......
......@@ -331,11 +331,15 @@ extern unsigned int kobjsize(const void *objp);
#ifdef CONFIG_ARCH_HAS_PKEYS
# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */
# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 /* on x86 and 5-bit value on ppc64 */
# define VM_PKEY_BIT0 VM_HIGH_ARCH_0
# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
#if CONFIG_ARCH_PKEY_BITS > 3
# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
#ifdef CONFIG_PPC
#else
# define VM_PKEY_BIT3 0
#endif
#if CONFIG_ARCH_PKEY_BITS > 4
# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
#else
# define VM_PKEY_BIT4 0
......@@ -374,8 +378,8 @@ extern unsigned int kobjsize(const void *objp);
#endif
#if defined(CONFIG_ARM64_MTE)
# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */
# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */
# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */
# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */
#else
# define VM_MTE VM_NONE
# define VM_MTE_ALLOWED VM_NONE
......
......@@ -441,6 +441,7 @@ typedef struct elf64_shdr {
#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
#define NT_ARM_POE 0x40f /* ARM POE registers */
#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
......
......@@ -156,6 +156,12 @@ static void pmull_sigill(void)
asm volatile(".inst 0x0ee0e000" : : : );
}
static void poe_sigill(void)
{
/* mrs x0, POR_EL0 */
asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0");
}
static void rng_sigill(void)
{
asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
......@@ -601,6 +607,14 @@ static const struct hwcap_data {
.cpuinfo = "pmull",
.sigill_fn = pmull_sigill,
},
{
.name = "POE",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_POE,
.cpuinfo = "poe",
.sigill_fn = poe_sigill,
.sigill_reliable = true,
},
{
.name = "RNG",
.at_hwcap = AT_HWCAP2,
......
......@@ -2,6 +2,7 @@
mangle_*
fake_sigreturn_*
fpmr_*
poe_*
sme_*
ssve_*
sve_*
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2023 Arm Limited
*
* Verify that the POR_EL0 register context in signal frames is set up as
* expected.
*/
#include <signal.h>
#include <ucontext.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <asm/sigcontext.h>
#include "test_signals_utils.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
#define SYS_POR_EL0 "S3_3_C10_C2_4"
static uint64_t get_por_el0(void)
{
uint64_t val;
asm volatile(
"mrs %0, " SYS_POR_EL0 "\n"
: "=r"(val)
:
: );
return val;
}
int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
{
struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
struct poe_context *poe_ctx;
size_t offset;
bool in_sigframe;
bool have_poe;
__u64 orig_poe;
have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE;
if (have_poe)
orig_poe = get_por_el0();
if (!get_current_context(td, &context.uc, sizeof(context)))
return 1;
poe_ctx = (struct poe_context *)
get_header(head, POE_MAGIC, td->live_sz, &offset);
in_sigframe = poe_ctx != NULL;
fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n",
in_sigframe ? "present" : "absent",
have_poe ? "with" : "without");
td->pass = (in_sigframe == have_poe);
/*
* Check that the value we read back was the one present at
* the time that the signal was triggered.
*/
if (have_poe && poe_ctx) {
if (poe_ctx->por_el0 != orig_poe) {
fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n",
poe_ctx->por_el0, orig_poe);
td->pass = false;
}
}
return 0;
}
struct tdescr tde = {
.name = "POR_EL0",
.descr = "Validate that POR_EL0 is present as expected",
.timeout = 3,
.run = poe_present,
};
......@@ -6,29 +6,6 @@
#include "testcases.h"
struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
size_t resv_sz, size_t *offset)
{
size_t offs = 0;
struct _aarch64_ctx *found = NULL;
if (!head || resv_sz < HDR_SZ)
return found;
while (offs <= resv_sz - HDR_SZ &&
head->magic != magic && head->magic) {
offs += head->size;
head = GET_RESV_NEXT_HEAD(head);
}
if (head->magic == magic) {
found = head;
if (offset)
*offset = offs;
}
return found;
}
bool validate_extra_context(struct extra_context *extra, char **err,
void **extra_data, size_t *extra_size)
{
......@@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (head->size != sizeof(struct esr_context))
*err = "Bad size for esr_context";
break;
case POE_MAGIC:
if (head->size != sizeof(struct poe_context))
*err = "Bad size for poe_context";
break;
case TPIDR2_MAGIC:
if (head->size != sizeof(struct tpidr2_context))
*err = "Bad size for tpidr2_context";
......
......@@ -26,6 +26,9 @@
#define HDR_SZ \
sizeof(struct _aarch64_ctx)
#define GET_UC_RESV_HEAD(uc) \
(struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved))
#define GET_SF_RESV_HEAD(sf) \
(struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
......@@ -88,8 +91,29 @@ struct fake_sigframe {
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
size_t resv_sz, size_t *offset);
static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
size_t resv_sz, size_t *offset)
{
size_t offs = 0;
struct _aarch64_ctx *found = NULL;
if (!head || resv_sz < HDR_SZ)
return found;
while (offs <= resv_sz - HDR_SZ &&
head->magic != magic && head->magic) {
offs += head->size;
head = GET_RESV_NEXT_HEAD(head);
}
if (head->magic == magic) {
found = head;
if (offset)
*offset = offs;
}
return found;
}
static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
size_t resv_sz,
......
......@@ -40,6 +40,18 @@ static struct feature_id_reg feat_id_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
4,
1
},
{
ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
16,
1
},
{
ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
16,
1
}
};
......@@ -468,6 +480,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
......@@ -475,6 +488,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
......
......@@ -104,7 +104,7 @@ TEST_GEN_FILES += $(BINARIES_64)
endif
else
ifneq (,$(findstring $(ARCH),powerpc))
ifneq (,$(filter $(ARCH),arm64 powerpc))
TEST_GEN_FILES += protection_keys
endif
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2023 Arm Ltd.
*/
#ifndef _PKEYS_ARM64_H
#define _PKEYS_ARM64_H
#include "vm_util.h"
/* for signal frame parsing */
#include "../arm64/signal/testcases/testcases.h"
#ifndef SYS_mprotect_key
# define SYS_mprotect_key 288
#endif
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 289
# define SYS_pkey_free 290
#endif
#define MCONTEXT_IP(mc) mc.pc
#define MCONTEXT_TRAPNO(mc) -1
#define PKEY_MASK 0xf
#define POE_NONE 0x0
#define POE_X 0x2
#define POE_RX 0x3
#define POE_RWX 0x7
#define NR_PKEYS 8
#define NR_RESERVED_PKEYS 1 /* pkey-0 */
#define PKEY_ALLOW_ALL 0x77777777
#define PKEY_BITS_PER_PKEY 4
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#undef HPAGE_SIZE
#define HPAGE_SIZE default_huge_page_size()
/* 4-byte instructions * 16384 = 64K page */
#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
static inline u64 __read_pkey_reg(void)
{
u64 pkey_reg = 0;
// POR_EL0
asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg));
return pkey_reg;
}
static inline void __write_pkey_reg(u64 pkey_reg)
{
u64 por = pkey_reg;
dprintf4("%s() changing %016llx to %016llx\n",
__func__, __read_pkey_reg(), pkey_reg);
// POR_EL0
asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :);
dprintf4("%s() pkey register after changing %016llx to %016llx\n",
__func__, __read_pkey_reg(), pkey_reg);
}
static inline int cpu_has_pkeys(void)
{
/* No simple way to determine this */
return 1;
}
static inline u32 pkey_bit_position(int pkey)
{
return pkey * PKEY_BITS_PER_PKEY;
}
static inline int get_arch_reserved_keys(void)
{
return NR_RESERVED_PKEYS;
}
void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
}
void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
return PTR_ERR_ENOTSUP;
}
#define set_pkey_bits set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
u32 shift = pkey_bit_position(pkey);
u64 new_val = POE_RWX;
/* mask out bits from pkey in old value */
reg &= ~((u64)PKEY_MASK << shift);
if (flags & PKEY_DISABLE_ACCESS)
new_val = POE_X;
else if (flags & PKEY_DISABLE_WRITE)
new_val = POE_RX;
/* OR in new bits for pkey */
reg |= new_val << shift;
return reg;
}
#define get_pkey_bits get_pkey_bits
static inline u64 get_pkey_bits(u64 reg, int pkey)
{
u32 shift = pkey_bit_position(pkey);
/*
* shift down the relevant bits to the lowest four, then
* mask off all the other higher bits
*/
u32 perm = (reg >> shift) & PKEY_MASK;
if (perm == POE_X)
return PKEY_DISABLE_ACCESS;
if (perm == POE_RX)
return PKEY_DISABLE_WRITE;
return 0;
}
static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
{
struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
struct poe_context *poe_ctx =
(struct poe_context *) get_header(ctx, POE_MAGIC,
sizeof(uctxt->uc_mcontext), NULL);
if (poe_ctx)
poe_ctx->por_el0 = pkey;
}
#endif /* _PKEYS_ARM64_H */
......@@ -91,12 +91,17 @@ void record_pkey_malloc(void *ptr, long size, int prot);
#include "pkey-x86.h"
#elif defined(__powerpc64__) /* arch */
#include "pkey-powerpc.h"
#elif defined(__aarch64__) /* arch */
#include "pkey-arm64.h"
#else /* arch */
#error Architecture not supported
#endif /* arch */
#ifndef PKEY_MASK
#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
#endif
#ifndef set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
u32 shift = pkey_bit_position(pkey);
......@@ -106,7 +111,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
reg |= (flags & PKEY_MASK) << shift;
return reg;
}
#endif
#ifndef get_pkey_bits
static inline u64 get_pkey_bits(u64 reg, int pkey)
{
u32 shift = pkey_bit_position(pkey);
......@@ -116,6 +123,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
*/
return ((reg >> shift) & PKEY_MASK);
}
#endif
extern u64 shadow_pkey_reg;
......
......@@ -8,7 +8,10 @@
# define SYS_pkey_free 385
#endif
#define REG_IP_IDX PT_NIP
#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX]
#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO]
#define REG_TRAPNO PT_TRAP
#define MCONTEXT_FPREGS
#define gregs gp_regs
#define fpregs fp_regs
#define si_pkey_offset 0x20
......
......@@ -15,6 +15,10 @@
#endif
#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX]
#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO]
#define MCONTEXT_FPREGS
#ifndef PKEY_DISABLE_ACCESS
# define PKEY_DISABLE_ACCESS 0x1
#endif
......
......@@ -147,7 +147,7 @@ void abort_hooks(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
#ifdef __powerpc64__
#if defined(__powerpc64__) || defined(__aarch64__)
/* This way, both 4K and 64K alignment are maintained */
__attribute__((__aligned__(65536)))
#else
......@@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
u64 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
......@@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
__func__, pkey, read_pkey_reg());
if (flags)
pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
......@@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
u64 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
......@@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
pkey, read_pkey_reg());
if (flags)
assert(read_pkey_reg() <= orig_pkey_reg);
}
void pkey_write_allow(int pkey)
......@@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
#ifdef MCONTEXT_FPREGS
char *fpregs;
#endif
#if defined(__i386__) || defined(__x86_64__) /* arch */
u32 *pkey_reg_ptr;
int pkey_reg_offset;
......@@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
__func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
ip = MCONTEXT_IP(uctxt->uc_mcontext);
#ifdef MCONTEXT_FPREGS
fpregs = (char *) uctxt->uc_mcontext.fpregs;
#endif
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
__func__, trapno, ip, si_code_str(si->si_code),
......@@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#endif /* arch */
dprintf1("siginfo: %p\n", si);
#ifdef MCONTEXT_FPREGS
dprintf1(" fpregs: %p\n", fpregs);
#endif
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
......@@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#elif defined(__powerpc64__) /* arch */
/* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey);
#elif defined(__aarch64__)
aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
#endif /* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
......@@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey)
* test program continue. We now have to restore it.
*/
if (__read_pkey_reg() != 0)
#else /* arch */
#elif defined(__aarch64__)
if (__read_pkey_reg() != PKEY_ALLOW_ALL)
#else
if (__read_pkey_reg() != shadow_pkey_reg)
#endif /* arch */
pkey_assert(0);
......@@ -1492,6 +1496,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, pkey);
// Reset back to PROT_EXEC | PROT_READ for architectures that support
// non-PKEY execute-only permissions.
ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
pkey_assert(!ret);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
......@@ -1665,6 +1674,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
}
#endif
#if defined(__aarch64__)
void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
{
pid_t child;
int status, ret;
struct iovec iov;
u64 trace_pkey;
/* Just a random pkey value.. */
u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
(POE_NONE << PKEY_BITS_PER_PKEY) |
POE_RWX;
child = fork();
pkey_assert(child >= 0);
dprintf3("[%d] fork() ret: %d\n", getpid(), child);
if (!child) {
ptrace(PTRACE_TRACEME, 0, 0, 0);
/* Stop and allow the tracer to modify PKRU directly */
raise(SIGSTOP);
/*
* need __read_pkey_reg() version so we do not do shadow_pkey_reg
* checking
*/
if (__read_pkey_reg() != new_pkey)
exit(1);
raise(SIGSTOP);
exit(0);
}
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
iov.iov_base = &trace_pkey;
iov.iov_len = 8;
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
pkey_assert(trace_pkey == read_pkey_reg());
trace_pkey = new_pkey;
ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
/* Test that the modification is visible in ptrace before any execution */
memset(&trace_pkey, 0, sizeof(trace_pkey));
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
pkey_assert(trace_pkey == new_pkey);
/* Execute the tracee */
ret = ptrace(PTRACE_CONT, child, 0, 0);
pkey_assert(ret == 0);
/* Test that the tracee saw the PKRU value change */
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
/* Test that the modification is visible in ptrace after execution */
memset(&trace_pkey, 0, sizeof(trace_pkey));
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
pkey_assert(trace_pkey == new_pkey);
ret = ptrace(PTRACE_CONT, child, 0, 0);
pkey_assert(ret == 0);
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFEXITED(status));
pkey_assert(WEXITSTATUS(status) == 0);
}
#endif
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
......@@ -1700,7 +1787,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
test_pkey_alloc_free_attach_pkey0,
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
test_ptrace_modifies_pkru,
#endif
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment