Commit 0c5ade74 authored by Catalin Marinas's avatar Catalin Marinas

Merge branches 'for-next/reorg-va-space', 'for-next/rust-for-arm64',...

Merge branches 'for-next/reorg-va-space', 'for-next/rust-for-arm64', 'for-next/misc', 'for-next/daif-cleanup', 'for-next/kselftest', 'for-next/documentation', 'for-next/sysreg' and 'for-next/dpisa', remote-tracking branch 'arm64/for-next/perf' into for-next/core

* arm64/for-next/perf: (39 commits)
  docs: perf: Fix build warning of hisi-pcie-pmu.rst
  perf: starfive: Only allow COMPILE_TEST for 64-bit architectures
  MAINTAINERS: Add entry for StarFive StarLink PMU
  docs: perf: Add description for StarFive's StarLink PMU
  dt-bindings: perf: starfive: Add JH8100 StarLink PMU
  perf: starfive: Add StarLink PMU support
  docs: perf: Update usage for target filter of hisi-pcie-pmu
  drivers/perf: hisi_pcie: Merge find_related_event() and get_event_idx()
  drivers/perf: hisi_pcie: Relax the check on related events
  drivers/perf: hisi_pcie: Check the target filter properly
  drivers/perf: hisi_pcie: Add more events for counting TLP bandwidth
  drivers/perf: hisi_pcie: Fix incorrect counting under metric mode
  drivers/perf: hisi_pcie: Introduce hisi_pcie_pmu_get_event_ctrl_val()
  drivers/perf: hisi_pcie: Rename hisi_pcie_pmu_{config,clear}_filter()
  drivers/perf: hisi: Enable HiSilicon Erratum 162700402 quirk for HIP09
  perf/arm_cspmu: Add devicetree support
  dt-bindings/perf: Add Arm CoreSight PMU
  perf/arm_cspmu: Simplify counter reset
  perf/arm_cspmu: Simplify attribute groups
  perf/arm_cspmu: Simplify initialisation
  ...

* for-next/reorg-va-space:
  : Reorganise the arm64 kernel VA space in preparation for LPA2 support
  : (52-bit VA/PA).
  arm64: kaslr: Adjust randomization range dynamically
  arm64: mm: Reclaim unused vmemmap region for vmalloc use
  arm64: vmemmap: Avoid base2 order of struct page size to dimension region
  arm64: ptdump: Discover start of vmemmap region at runtime
  arm64: ptdump: Allow all region boundaries to be defined at boot time
  arm64: mm: Move fixmap region above vmemmap region
  arm64: mm: Move PCI I/O emulation region above the vmemmap region

* for-next/rust-for-arm64:
  : Enable Rust support for arm64
  arm64: rust: Enable Rust support for AArch64
  rust: Refactor the build target to allow the use of builtin targets

* for-next/misc:
  : Miscellaneous arm64 patches
  ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512
  arm64: Remove enable_daif macro
  arm64/hw_breakpoint: Directly use ESR_ELx_WNR for an watchpoint exception
  arm64: cpufeatures: Clean up temporary variable to simplify code
  arm64: Update setup_arch() comment on interrupt masking
  arm64: remove unnecessary ifdefs around is_compat_task()
  arm64: ftrace: Don't forbid CALL_OPS+CC_OPTIMIZE_FOR_SIZE with Clang
  arm64/sme: Ensure that all fields in SMCR_EL1 are set to known values
  arm64/sve: Ensure that all fields in ZCR_EL1 are set to known values
  arm64/sve: Document that __SVE_VQ_MAX is much larger than needed
  arm64: make member of struct pt_regs and it's offset macro in the same order
  arm64: remove unneeded BUILD_BUG_ON assertion
  arm64: kretprobes: acquire the regs via a BRK exception
  arm64: io: permit offset addressing
  arm64: errata: Don't enable workarounds for "rare" errata by default

* for-next/daif-cleanup:
  : Clean up DAIF handling for EL0 returns
  arm64: Unmask Debug + SError in do_notify_resume()
  arm64: Move do_notify_resume() to entry-common.c
  arm64: Simplify do_notify_resume() DAIF masking

* for-next/kselftest:
  : Miscellaneous arm64 kselftest patches
  kselftest/arm64: Test that ptrace takes effect in the target process

* for-next/documentation:
  : arm64 documentation patches
  arm64/sme: Remove spurious 'is' in SME documentation
  arm64/fp: Clarify effect of setting an unsupported system VL
  arm64/sme: Fix cut'n'paste in ABI document
  arm64/sve: Remove bitrotted comment about syscall behaviour

* for-next/sysreg:
  : sysreg updates
  arm64/sysreg: Update ID_AA64DFR0_EL1 register
  arm64/sysreg: Update ID_DFR0_EL1 register fields
  arm64/sysreg: Add register fields for ID_AA64DFR1_EL1

* for-next/dpisa:
  : Support for 2023 dpISA extensions
  kselftest/arm64: Add 2023 DPISA hwcap test coverage
  kselftest/arm64: Add basic FPMR test
  kselftest/arm64: Handle FPMR context in generic signal frame parser
  arm64/hwcap: Define hwcaps for 2023 DPISA features
  arm64/ptrace: Expose FPMR via ptrace
  arm64/signal: Add FPMR signal handling
  arm64/fpsimd: Support FEAT_FPMR
  arm64/fpsimd: Enable host kernel access to FPMR
  arm64/cpufeature: Hook new identification registers up to cpufeature
......@@ -317,6 +317,55 @@ HWCAP2_LRCPC3
HWCAP2_LSE128
Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
HWCAP2_FPMR
Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001.
HWCAP2_LUT
Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001.
HWCAP2_FAMINMAX
Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001.
HWCAP2_F8CVT
Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1.
HWCAP2_F8FMA
Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1.
HWCAP2_F8DP4
Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1.
HWCAP2_F8DP2
Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1.
HWCAP2_F8E4M3
Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1.
HWCAP2_F8E5M2
Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1.
HWCAP2_SME_LUTV2
Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1.
HWCAP2_SME_F8F16
Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1.
HWCAP2_SME_F8F32
Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1.
HWCAP2_SME_SF8FMA
Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1.
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
HWCAP2_SME_SF8DP2
Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1.
HWCAP2_SME_SF8DP4
Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
4. Unused AT_HWCAP bits
-----------------------
......
......@@ -35,8 +35,9 @@ can be triggered by Linux).
For software workarounds that may adversely impact systems unaffected by
the erratum in question, a Kconfig entry is added under "Kernel
Features" -> "ARM errata workarounds via the alternatives framework".
These are enabled by default and patched in at runtime when an affected
CPU is detected. For less-intrusive workarounds, a Kconfig option is not
With the exception of workarounds for errata deemed "rare" by Arm, these
are enabled by default and patched in at runtime when an affected CPU is
detected. For less-intrusive workarounds, a Kconfig option is not
available and the code is structured (preferably with a comment) in such
a way that the erratum will not be hit.
......
......@@ -75,7 +75,7 @@ model features for SME is included in Appendix A.
2. Vector lengths
------------------
SME defines a second vector length similar to the SVE vector length which is
SME defines a second vector length similar to the SVE vector length which
controls the size of the streaming mode SVE vectors and the ZA matrix array.
The ZA matrix is square with each side having as many bytes as a streaming
mode SVE vector.
......@@ -238,12 +238,12 @@ prctl(PR_SME_SET_VL, unsigned long arg)
bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
unspecified, including both streaming and non-streaming SVE state.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
......@@ -379,9 +379,8 @@ The regset data starts with struct user_za_header, containing:
/proc/sys/abi/sme_default_vector_length
Writing the text representation of an integer to this file sets the system
default vector length to the specified value, unless the value is greater
than the maximum vector length supported by the system in which case the
default vector length is set to that maximum.
default vector length to the specified value rounded to a supported value
using the same rules as for setting vector length via PR_SME_SET_VL.
The result can be determined by reopening the file and reading its
contents.
......
......@@ -117,11 +117,6 @@ the SVE instruction set architecture.
* The SVE registers are not used to pass arguments to or receive results from
any syscall.
* In practice the affected registers/bits will be preserved or will be replaced
with zeros on return from a syscall, but userspace should not make
assumptions about this. The kernel behaviour may vary on a case-by-case
basis.
* All other SVE state of a thread, including the currently configured vector
length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
length (if any), is preserved across all syscalls, subject to the specific
......@@ -428,9 +423,8 @@ The regset data starts with struct user_sve_header, containing:
/proc/sys/abi/sve_default_vector_length
Writing the text representation of an integer to this file sets the system
default vector length to the specified value, unless the value is greater
than the maximum vector length supported by the system in which case the
default vector length is set to that maximum.
default vector length to the specified value rounded to a supported value
using the same rules as for setting vector length via PR_SVE_SET_VL.
The result can be determined by reopening the file and reading its
contents.
......
......@@ -15,6 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
============= ================ ==============================================
Architecture Level of support Constraints
============= ================ ==============================================
``arm64`` Maintained Little Endian only.
``loongarch`` Maintained -
``um`` Maintained ``x86_64`` only.
``x86`` Maintained ``x86_64`` only.
......
......@@ -561,7 +561,6 @@ KBUILD_CFLAGS += -fno-strict-aliasing
KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_RUSTFLAGS := $(rust_common_flags) \
--target=$(objtree)/scripts/target.json \
-Cpanic=abort -Cembed-bitcode=n -Clto=n \
-Cforce-unwind-tables=n -Ccodegen-units=1 \
-Csymbol-mangling-version=v0 \
......
......@@ -120,6 +120,7 @@ config ARM64
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if (SUSPEND || CPU_IDLE)
select CPUMASK_OFFSTACK if NR_CPUS > 256
select CRC32
select DCACHE_WORD_ACCESS
select DYNAMIC_FTRACE if FUNCTION_TRACER
......@@ -198,7 +199,7 @@ config ARM64
if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS
select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \
!CC_OPTIMIZE_FOR_SIZE)
(CC_IS_CLANG || !CC_OPTIMIZE_FOR_SIZE))
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_SAMPLE_FTRACE_DIRECT
......@@ -229,6 +230,7 @@ config ARM64
select HAVE_FUNCTION_ARG_ACCESS_API
select MMU_GATHER_RCU_TABLE_FREE
select HAVE_RSEQ
select HAVE_RUST if CPU_LITTLE_ENDIAN
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
......@@ -547,9 +549,8 @@ config ARM64_ERRATUM_832075
If unsure, say Y.
config ARM64_ERRATUM_834220
bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)"
depends on KVM
default y
help
This option adds an alternative code sequence to work around ARM
erratum 834220 on Cortex-A57 parts up to r1p2.
......@@ -565,7 +566,7 @@ config ARM64_ERRATUM_834220
as it depends on the alternative framework, which will only patch
the kernel if an affected CPU is detected.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1742098
bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
......@@ -692,8 +693,7 @@ config ARM64_WORKAROUND_REPEAT_TLBI
bool
config ARM64_ERRATUM_2441007
bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A55 erratum #2441007.
......@@ -706,11 +706,10 @@ config ARM64_ERRATUM_2441007
Work around this by adding the affected CPUs to the list that needs
TLB sequences to be done twice.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1286807
bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
default y
bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A76 erratum 1286807.
......@@ -724,6 +723,8 @@ config ARM64_ERRATUM_1286807
invalidated has been observed by other observers. The
workaround repeats the TLBI+DSB operation.
If unsure, say N.
config ARM64_ERRATUM_1463225
bool "Cortex-A76: Software Step might prevent interrupt recognition"
default y
......@@ -743,8 +744,7 @@ config ARM64_ERRATUM_1463225
If unsure, say Y.
config ARM64_ERRATUM_1542419
bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
default y
bool "Neoverse-N1: workaround mis-ordering of instruction fetches (rare)"
help
This option adds a workaround for ARM Neoverse-N1 erratum
1542419.
......@@ -756,7 +756,7 @@ config ARM64_ERRATUM_1542419
Workaround the issue by hiding the DIC feature from EL0. This
forces user-space to perform cache maintenance.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_1508412
bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read"
......@@ -931,8 +931,7 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
config ARM64_ERRATUM_2441009
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)"
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A510 erratum #2441009.
......@@ -945,7 +944,7 @@ config ARM64_ERRATUM_2441009
Work around this by adding the affected CPUs to the list that needs
TLB sequences to be done twice.
If unsure, say Y.
If unsure, say N.
config ARM64_ERRATUM_2064142
bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
......@@ -1427,7 +1426,7 @@ config SCHED_SMT
config NR_CPUS
int "Maximum number of CPUs (2-4096)"
range 2 4096
default "256"
default "512"
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
......
......@@ -41,6 +41,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only \
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(compat_vdso)
KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon"
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
......@@ -65,7 +67,9 @@ endif
ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti
KBUILD_RUSTFLAGS += -Zbranch-protection=bti,pac-ret
else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
KBUILD_RUSTFLAGS += -Zbranch-protection=pac-ret
ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y)
KBUILD_CFLAGS += -mbranch-protection=pac-ret
else
......
......@@ -38,10 +38,6 @@
msr daifset, #0xf
.endm
.macro enable_daif
msr daifclr, #0xf
.endm
/*
* Save/restore interrupts.
*/
......
......@@ -11,6 +11,7 @@
* 0x004: for installing kprobes
* 0x005: for installing uprobes
* 0x006: for kprobe software single-step
* 0x007: for kretprobe return
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
......@@ -23,6 +24,7 @@
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
#define KPROBES_BRK_SS_IMM 0x006
#define KRETPROBES_BRK_IMM 0x007
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
......
......@@ -52,14 +52,17 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64isar0;
u64 reg_id_aa64isar1;
u64 reg_id_aa64isar2;
u64 reg_id_aa64isar3;
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
u64 reg_id_aa64mmfr2;
u64 reg_id_aa64mmfr3;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
u64 reg_id_aa64pfr2;
u64 reg_id_aa64zfr0;
u64 reg_id_aa64smfr0;
u64 reg_id_aa64fpfr0;
struct cpuinfo_32bit aarch32;
};
......
......@@ -768,6 +768,11 @@ static __always_inline bool system_supports_tpidr2(void)
return system_supports_sme();
}
static __always_inline bool system_supports_fpmr(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
}
static __always_inline bool system_supports_cnp(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_CNP);
......
......@@ -201,16 +201,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define COMPAT_ELF_PLATFORM ("v8l")
#endif
#ifdef CONFIG_COMPAT
/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
typedef unsigned int compat_elf_greg_t;
typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
#ifdef CONFIG_COMPAT
/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
int compat_elf_check_arch(const struct elf32_hdr *);
......
......@@ -74,7 +74,7 @@ void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
void do_el0_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
void do_signal(struct pt_regs *regs);
void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
......@@ -21,7 +21,6 @@
#include <linux/stddef.h>
#include <linux/types.h>
#ifdef CONFIG_COMPAT
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
......@@ -30,7 +29,6 @@
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
static inline unsigned long cpacr_save_enable_kernel_sve(void)
{
......@@ -89,6 +87,7 @@ struct cpu_fp_state {
void *sve_state;
void *sme_state;
u64 *svcr;
u64 *fpmr;
unsigned int sve_vl;
unsigned int sme_vl;
enum fp_type *fp_type;
......@@ -154,6 +153,7 @@ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
extern u64 read_smcr_features(void);
......
......@@ -59,7 +59,6 @@ static inline void decode_ctrl_reg(u32 reg,
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
......
......@@ -142,6 +142,21 @@
#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR)
#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT)
#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX)
#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT)
#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA)
#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4)
#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2)
#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3)
#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2)
#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2)
#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16)
#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32)
#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
/*
* This yields a mask that user programs can use to figure out what
......
......@@ -24,25 +24,29 @@
#define __raw_writeb __raw_writeb
static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u8 __iomem *ptr = addr;
asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writew __raw_writew
static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u16 __iomem *ptr = addr;
asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writel __raw_writel
static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
volatile u32 __iomem *ptr = addr;
asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writeq __raw_writeq
static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
volatile u64 __iomem *ptr = addr;
asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_readb __raw_readb
......
......@@ -105,7 +105,7 @@
#define HCRX_GUEST_FLAGS \
(HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
(cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)
......
......@@ -543,6 +543,7 @@ struct kvm_vcpu_arch {
enum fp_type fp_type;
unsigned int sve_max_vl;
u64 svcr;
u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
......
......@@ -30,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET)
#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page))
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
......@@ -47,11 +47,11 @@
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
#define MODULES_VSIZE (SZ_2G)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_8M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE)
#define VMEMMAP_END (-UL(SZ_1G))
#define PCI_IO_START (VMEMMAP_END + SZ_8M)
#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE)
#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
#define VA_BITS_MIN (48)
......
......@@ -18,11 +18,15 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
* VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
* and fixed mappings
* VMALLOC_END: extends to the available space below vmemmap
*/
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#if VA_BITS == VA_BITS_MIN
#define VMALLOC_END (VMEMMAP_START - SZ_8M)
#else
#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
#endif
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
......
......@@ -155,6 +155,8 @@ struct thread_struct {
struct {
unsigned long tp_value; /* TLS register */
unsigned long tp2_value;
u64 fpmr;
unsigned long pad;
struct user_fpsimd_state fpsimd_state;
} uw;
......@@ -253,6 +255,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
sizeof_field(struct thread_struct, uw.tp_value) +
sizeof_field(struct thread_struct, uw.tp2_value) +
sizeof_field(struct thread_struct, uw.fpmr) +
sizeof_field(struct thread_struct, uw.pad) +
sizeof_field(struct thread_struct, uw.fpsimd_state));
*offset = offsetof(struct thread_struct, uw);
......
......@@ -107,5 +107,20 @@
#define HWCAP2_SVE_B16B16 (1UL << 45)
#define HWCAP2_LRCPC3 (1UL << 46)
#define HWCAP2_LSE128 (1UL << 47)
#define HWCAP2_FPMR (1UL << 48)
#define HWCAP2_LUT (1UL << 49)
#define HWCAP2_FAMINMAX (1UL << 50)
#define HWCAP2_F8CVT (1UL << 51)
#define HWCAP2_F8FMA (1UL << 52)
#define HWCAP2_F8DP4 (1UL << 53)
#define HWCAP2_F8DP2 (1UL << 54)
#define HWCAP2_F8E4M3 (1UL << 55)
#define HWCAP2_F8E5M2 (1UL << 56)
#define HWCAP2_SME_LUTV2 (1UL << 57)
#define HWCAP2_SME_F8F16 (1UL << 58)
#define HWCAP2_SME_F8F32 (1UL << 59)
#define HWCAP2_SME_SF8FMA (1UL << 60)
#define HWCAP2_SME_SF8DP4 (1UL << 61)
#define HWCAP2_SME_SF8DP2 (1UL << 62)
#endif /* _UAPI__ASM_HWCAP_H */
......@@ -152,6 +152,14 @@ struct tpidr2_context {
__u64 tpidr2;
};
/* FPMR context */
#define FPMR_MAGIC 0x46504d52
struct fpmr_context {
struct _aarch64_ctx head;
__u64 fpmr;
};
#define ZA_MAGIC 0x54366345
struct za_context {
......
......@@ -13,6 +13,17 @@
#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */
/*
* Yes, __SVE_VQ_MAX is 512 QUADWORDS.
*
* To help ensure forward portability, this is much larger than the
* current maximum value defined by the SVE architecture. While arrays
* or static allocations can be sized based on this value, watch out!
* It will waste a surprisingly large amount of memory.
*
* Dynamic sizing based on the actual runtime vector length is likely to
* be preferable for most purposes.
*/
#define __SVE_VQ_MIN 1
#define __SVE_VQ_MAX 512
......
......@@ -75,8 +75,8 @@ int main(void)
DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
......
This diff is collapsed.
......@@ -128,6 +128,21 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
[KERNEL_HWCAP_LRCPC3] = "lrcpc3",
[KERNEL_HWCAP_LSE128] = "lse128",
[KERNEL_HWCAP_FPMR] = "fpmr",
[KERNEL_HWCAP_LUT] = "lut",
[KERNEL_HWCAP_FAMINMAX] = "faminmax",
[KERNEL_HWCAP_F8CVT] = "f8cvt",
[KERNEL_HWCAP_F8FMA] = "f8fma",
[KERNEL_HWCAP_F8DP4] = "f8dp4",
[KERNEL_HWCAP_F8DP2] = "f8dp2",
[KERNEL_HWCAP_F8E4M3] = "f8e4m3",
[KERNEL_HWCAP_F8E5M2] = "f8e5m2",
[KERNEL_HWCAP_SME_LUTV2] = "smelutv2",
[KERNEL_HWCAP_SME_F8F16] = "smef8f16",
[KERNEL_HWCAP_SME_F8F32] = "smef8f32",
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
};
#ifdef CONFIG_COMPAT
......@@ -443,14 +458,17 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
info->reg_id_aa64isar3 = read_cpuid(ID_AA64ISAR3_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
info->reg_id_aa64fpfr0 = read_cpuid(ID_AA64FPFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
info->reg_gmid = read_cpuid(GMID_EL1);
......
......@@ -10,6 +10,7 @@
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/resume_user_mode.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/thread_info.h>
......@@ -126,16 +127,49 @@ static __always_inline void __exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
local_irq_enable();
if (thread_flags & _TIF_NEED_RESCHED)
schedule();
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
send_sig_fault(SIGSEGV, SEGV_MTEAERR,
(void __user *)NULL, current);
}
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);
if (thread_flags & _TIF_FOREIGN_FPSTATE)
fpsimd_restore_current_state();
local_irq_disable();
thread_flags = read_thread_flags();
} while (thread_flags & _TIF_WORK_MASK);
}
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long flags;
local_daif_mask();
local_irq_disable();
flags = read_thread_flags();
if (unlikely(flags & _TIF_WORK_MASK))
do_notify_resume(regs, flags);
local_daif_mask();
lockdep_sys_exit();
}
......
......@@ -359,6 +359,9 @@ static void task_fpsimd_load(void)
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_fpmr())
write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
......@@ -446,6 +449,9 @@ static void fpsimd_save_user_state(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
if (system_supports_fpmr())
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
/*
* If a task is in a syscall the ABI allows us to only
* preserve the state shared with FPSIMD so don't bother
......@@ -688,6 +694,12 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
SYS_SCTLR_EL1);
}
#ifdef CONFIG_ARM64_SVE
/*
* Call __sve_free() directly only if you know task can't be scheduled
......@@ -1134,6 +1146,8 @@ void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
......@@ -1245,6 +1259,9 @@ void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
/* Ensure all bits in SMCR are set to known values */
write_sysreg_s(0, SYS_SMCR_EL1);
/* Allow EL0 to access TPIDR2 */
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
......@@ -1680,6 +1697,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
last->fpmr = &current->thread.uw.fpmr;
last->fp_type = &current->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
......
......@@ -21,6 +21,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
......@@ -779,7 +780,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
* Check that the access type matches.
* 0 => load, otherwise => store
*/
access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
access = (esr & ESR_ELx_WNR) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
continue;
......
......@@ -36,6 +36,8 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
PROVIDE(__pi_vabits_actual = vabits_actual);
#ifdef CONFIG_KVM
/*
......
......@@ -14,6 +14,7 @@
#include <asm/archrandom.h>
#include <asm/memory.h>
#include <asm/pgtable.h>
/* taken from lib/string.c */
static char *__strstr(const char *s1, const char *s2)
......@@ -87,7 +88,7 @@ static u64 get_kaslr_seed(void *fdt)
asmlinkage u64 kaslr_early_init(void *fdt)
{
u64 seed;
u64 seed, range;
if (is_kaslr_disabled_cmdline(fdt))
return 0;
......@@ -102,9 +103,9 @@ asmlinkage u64 kaslr_early_init(void *fdt)
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
* the lower and upper quarters to avoid colliding with other
* allocations.
* 'middle' half of the VMALLOC area, and stay clear of the lower and
* upper quarters to avoid colliding with other allocations.
*/
return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
range = (VMALLOC_END - KIMAGE_VADDR) / 2;
return range / 2 + (((__uint128_t)range * seed) >> 64);
}
......@@ -371,6 +371,21 @@ static struct break_hook kprobes_break_ss_hook = {
.fn = kprobe_breakpoint_ss_handler,
};
static int __kprobes
kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
{
if (regs->pc != (unsigned long)__kretprobe_trampoline)
return DBG_HOOK_ERROR;
regs->pc = kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
return DBG_HOOK_HANDLED;
}
static struct break_hook kretprobes_break_hook = {
.imm = KRETPROBES_BRK_IMM,
.fn = kretprobe_breakpoint_handler,
};
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
......@@ -396,11 +411,6 @@ int __init arch_populate_kprobe_blacklist(void)
return ret;
}
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
......@@ -420,6 +430,7 @@ int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
register_kernel_break_hook(&kprobes_break_ss_hook);
register_kernel_break_hook(&kretprobes_break_hook);
return 0;
}
......@@ -4,83 +4,17 @@
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/asm-bug.h>
#include <asm/assembler.h>
.text
.macro save_all_base_regs
stp x0, x1, [sp, #S_X0]
stp x2, x3, [sp, #S_X2]
stp x4, x5, [sp, #S_X4]
stp x6, x7, [sp, #S_X6]
stp x8, x9, [sp, #S_X8]
stp x10, x11, [sp, #S_X10]
stp x12, x13, [sp, #S_X12]
stp x14, x15, [sp, #S_X14]
stp x16, x17, [sp, #S_X16]
stp x18, x19, [sp, #S_X18]
stp x20, x21, [sp, #S_X20]
stp x22, x23, [sp, #S_X22]
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
*/
mrs x0, nzcv
mrs x1, daif
orr x0, x0, x1
mrs x1, CurrentEL
orr x0, x0, x1
mrs x1, SPSel
orr x0, x0, x1
stp xzr, x0, [sp, #S_PC]
.endm
.macro restore_all_base_regs
ldr x0, [sp, #S_PSTATE]
and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
msr nzcv, x0
ldp x0, x1, [sp, #S_X0]
ldp x2, x3, [sp, #S_X2]
ldp x4, x5, [sp, #S_X4]
ldp x6, x7, [sp, #S_X6]
ldp x8, x9, [sp, #S_X8]
ldp x10, x11, [sp, #S_X10]
ldp x12, x13, [sp, #S_X12]
ldp x14, x15, [sp, #S_X14]
ldp x16, x17, [sp, #S_X16]
ldp x18, x19, [sp, #S_X18]
ldp x20, x21, [sp, #S_X20]
ldp x22, x23, [sp, #S_X22]
ldp x24, x25, [sp, #S_X24]
ldp x26, x27, [sp, #S_X26]
ldp x28, x29, [sp, #S_X28]
.endm
SYM_CODE_START(__kretprobe_trampoline)
sub sp, sp, #PT_REGS_SIZE
save_all_base_regs
/* Setup a frame pointer. */
add x29, sp, #S_FP
mov x0, sp
bl trampoline_probe_handler
/*
* Replace trampoline address in lr with actual orig_ret_addr return
* address.
* Trigger a breakpoint exception. The PC will be adjusted by
* kretprobe_breakpoint_handler(), and no subsequent instructions will
* be executed from the trampoline.
*/
mov lr, x0
/* The frame pointer (x29) is restored with other registers. */
restore_all_base_regs
add sp, sp, #PT_REGS_SIZE
ret
brk #KRETPROBES_BRK_IMM
ASM_BUG()
SYM_CODE_END(__kretprobe_trampoline)
......@@ -290,9 +290,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
fpsimd_preserve_current_state();
*dst = *src;
/* We rely on the above assignment to initialize dst's thread_flags: */
BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
/*
* Detach src's sve_state (if any) from dst so that it does not
* get erroneously used or freed prematurely. dst's copies
......
......@@ -174,7 +174,6 @@ static void ptrace_hbptriggered(struct perf_event *bp,
struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
const char *desc = "Hardware breakpoint trap (ptrace)";
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
int si_errno = 0;
int i;
......@@ -196,7 +195,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
desc);
return;
}
#endif
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
}
......@@ -698,6 +697,39 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
static int fpmr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
if (!system_supports_fpmr())
return -EINVAL;
if (target == current)
fpsimd_preserve_current_state();
return membuf_store(&to, target->thread.uw.fpmr);
}
static int fpmr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
int ret;
unsigned long fpmr;
if (!system_supports_fpmr())
return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
if (ret)
return ret;
target->thread.uw.fpmr = fpmr;
fpsimd_flush_task_state(target);
return 0;
}
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
......@@ -1419,6 +1451,7 @@ enum aarch64_regset {
REGSET_HW_BREAK,
REGSET_HW_WATCH,
#endif
REGSET_FPMR,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
......@@ -1497,6 +1530,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = system_call_get,
.set = system_call_set,
},
[REGSET_FPMR] = {
.core_note_type = NT_ARM_FPMR,
.n = 1,
.size = sizeof(u64),
.align = sizeof(u64),
.regset_get = fpmr_get,
.set = fpmr_set,
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
......@@ -1596,7 +1637,6 @@ static const struct user_regset_view user_aarch64_view = {
.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets)
};
#ifdef CONFIG_COMPAT
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
......@@ -1853,6 +1893,7 @@ static const struct user_regset_view user_aarch32_ptrace_view = {
.regsets = aarch32_ptrace_regsets, .n = ARRAY_SIZE(aarch32_ptrace_regsets)
};
#ifdef CONFIG_COMPAT
static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t __user *ret)
{
......@@ -2114,7 +2155,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
/*
* Core dumping of 32-bit tasks or compat ptrace requests must use the
* user_aarch32_view compatible with arm32. Native ptrace requests on
......@@ -2125,7 +2165,7 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
return &user_aarch32_view;
else if (is_compat_thread(task_thread_info(task)))
return &user_aarch32_ptrace_view;
#endif
return &user_aarch64_view;
}
......
......@@ -320,9 +320,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
dynamic_scs_init();
/*
* Unmask asynchronous aborts and fiq after bringing up possible
* earlycon. (Report possible System Errors once we can report this
* occurred).
* Unmask SError as soon as possible after initializing earlycon so
* that we can report any SErrors immediately.
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);
......
......@@ -16,8 +16,8 @@
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/resume_user_mode.h>
#include <linux/ratelimit.h>
#include <linux/rseq.h>
#include <linux/syscalls.h>
#include <asm/daifflags.h>
......@@ -60,6 +60,7 @@ struct rt_sigframe_user_layout {
unsigned long tpidr2_offset;
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
......@@ -182,6 +183,8 @@ struct user_ctxs {
u32 za_size;
struct zt_context __user *zt;
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
......@@ -227,6 +230,33 @@ static int restore_fpsimd_context(struct user_ctxs *user)
return err ? -EFAULT : 0;
}
static int preserve_fpmr_context(struct fpmr_context __user *ctx)
{
int err = 0;
current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
__put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
__put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
return err;
}
static int restore_fpmr_context(struct user_ctxs *user)
{
u64 fpmr;
int err = 0;
if (user->fpmr_size != sizeof(*user->fpmr))
return -EINVAL;
__get_user_error(fpmr, &user->fpmr->fpmr, err);
if (!err)
write_sysreg_s(fpmr, SYS_FPMR);
return err;
}
#ifdef CONFIG_ARM64_SVE
......@@ -590,6 +620,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->tpidr2 = NULL;
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
......@@ -684,6 +715,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->zt_size = size;
break;
case FPMR_MAGIC:
if (!system_supports_fpmr())
goto invalid;
if (user->fpmr)
goto invalid;
user->fpmr = (struct fpmr_context __user *)head;
user->fpmr_size = size;
break;
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
......@@ -806,6 +848,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_tpidr2() && user.tpidr2)
err = restore_tpidr2_context(&user);
if (err == 0 && system_supports_fpmr() && user.fpmr)
err = restore_fpmr_context(&user);
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
......@@ -928,6 +973,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}
}
if (system_supports_fpmr()) {
err = sigframe_alloc(user, &user->fpmr_offset,
sizeof(struct fpmr_context));
if (err)
return err;
}
return sigframe_alloc_end(user);
}
......@@ -983,6 +1035,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_tpidr2_context(tpidr2_ctx);
}
/* FPMR if supported */
if (system_supports_fpmr() && err == 0) {
struct fpmr_context __user *fpmr_ctx =
apply_user_offset(user, user->fpmr_offset);
err |= preserve_fpmr_context(fpmr_ctx);
}
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
......@@ -1207,7 +1266,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
static void do_signal(struct pt_regs *regs)
void do_signal(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
......@@ -1278,41 +1337,6 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
schedule();
} else {
local_daif_restore(DAIF_PROCCTX);
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
send_sig_fault(SIGSEGV, SEGV_MTEAERR,
(void __user *)NULL, current);
}
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);
if (thread_flags & _TIF_FOREIGN_FPSTATE)
fpsimd_restore_current_state();
}
local_daif_mask();
thread_flags = read_thread_flags();
} while (thread_flags & _TIF_WORK_MASK);
}
unsigned long __ro_after_init signal_minsigstksz;
/*
......
......@@ -20,14 +20,11 @@ long sys_ni_syscall(void);
static long do_ni_syscall(struct pt_regs *regs, int scno)
{
#ifdef CONFIG_COMPAT
long ret;
if (is_compat_task()) {
ret = compat_arm_syscall(regs, scno);
long ret = compat_arm_syscall(regs, scno);
if (ret != -ENOSYS)
return ret;
}
#endif
return sys_ni_syscall();
}
......
......@@ -153,6 +153,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fpmr = &vcpu->arch.fpmr;
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
......
......@@ -16,6 +16,9 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
/* ensure that the fixmap region does not grow down into the PCI I/O region */
static_assert(FIXADDR_TOT_START > PCI_IO_END);
#define NR_BM_PTE_TABLES \
SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PMD_SHIFT)
#define NR_BM_PMD_TABLES \
......
......@@ -26,34 +26,6 @@
#include <asm/ptdump.h>
enum address_markers_idx {
PAGE_OFFSET_NR = 0,
PAGE_END_NR,
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
KASAN_START_NR,
#endif
};
static struct addr_marker address_markers[] = {
{ PAGE_OFFSET, "Linear Mapping start" },
{ 0 /* PAGE_END */, "Linear Mapping end" },
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
{ 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
{ KASAN_SHADOW_END, "Kasan shadow end" },
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
{ FIXADDR_TOT_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
{ -1, NULL },
};
#define pt_dump_seq_printf(m, fmt, args...) \
({ \
if (m) \
......@@ -339,9 +311,8 @@ static void __init ptdump_initialize(void)
pg_level[i].mask |= pg_level[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info = {
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
.mm = &init_mm,
.markers = address_markers,
.base_addr = PAGE_OFFSET,
};
......@@ -375,10 +346,31 @@ void ptdump_check_wx(void)
static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
u64 page_offset = _PAGE_OFFSET(vabits_actual);
u64 vmemmap_start = (u64)virt_to_page((void *)page_offset);
struct addr_marker m[] = {
{ PAGE_OFFSET, "Linear Mapping start" },
{ PAGE_END, "Linear Mapping end" },
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START;
{ KASAN_SHADOW_START, "Kasan shadow start" },
{ KASAN_SHADOW_END, "Kasan shadow end" },
#endif
{ MODULES_VADDR, "Modules start" },
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
{ vmemmap_start, "vmemmap start" },
{ VMEMMAP_END, "vmemmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
{ FIXADDR_TOT_START, "Fixmap start" },
{ FIXADDR_TOP, "Fixmap end" },
{ -1, NULL },
};
static struct addr_marker address_markers[ARRAY_SIZE(m)] __ro_after_init;
kernel_ptdump_info.markers = memcpy(address_markers, m, sizeof(m));
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
return 0;
......
......@@ -26,6 +26,7 @@ HAS_ECV
HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
HAS_FPMR
HAS_FGT
HAS_FPSIMD
HAS_GENERIC_AUTH
......
......@@ -200,6 +200,7 @@ UnsignedEnum 27:24 PerfMon
0b0110 PMUv3p5
0b0111 PMUv3p7
0b1000 PMUv3p8
0b1001 PMUv3p9
0b1111 IMPDEF
EndEnum
Enum 23:20 MProfDbg
......@@ -231,6 +232,7 @@ Enum 3:0 CopDbg
0b1000 Debugv8p2
0b1001 Debugv8p4
0b1010 Debugv8p8
0b1011 Debugv8p9
EndEnum
EndSysreg
......@@ -1221,6 +1223,7 @@ UnsignedEnum 35:32 PMSVer
0b0010 V1P1
0b0011 V1P2
0b0100 V1P3
0b0101 V1P4
EndEnum
Field 31:28 CTX_CMPs
Res0 27:24
......@@ -1247,11 +1250,41 @@ UnsignedEnum 3:0 DebugVer
0b1000 V8P2
0b1001 V8P4
0b1010 V8P8
0b1011 V8P9
EndEnum
EndSysreg
Sysreg ID_AA64DFR1_EL1 3 0 0 5 1
Res0 63:0
Field 63:56 ABL_CMPs
UnsignedEnum 55:52 DPFZS
0b0000 IGNR
0b0001 FRZN
EndEnum
UnsignedEnum 51:48 EBEP
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 47:44 ITE
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 43:40 ABLE
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 39:36 PMICNTR
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 35:32 SPMU
0b0000 NI
0b0001 IMP
0b0010 IMP_SPMZR
EndEnum
Field 31:24 CTX_CMPs
Field 23:16 WRPs
Field 15:8 BRPs
Field 7:0 SYSPMUID
EndSysreg
Sysreg ID_AA64AFR0_EL1 3 0 0 5 4
......
......@@ -82,6 +82,7 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs
KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
endif
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic
ifeq ($(CONFIG_RELOCATABLE),y)
......
......@@ -68,6 +68,7 @@ export BITS
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
......
......@@ -440,6 +440,7 @@ typedef struct elf64_shdr {
#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
......
......@@ -297,6 +297,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
# Derived from `scripts/Makefile.clang`.
BINDGEN_TARGET_x86 := x86_64-linux-gnu
BINDGEN_TARGET_arm64 := aarch64-linux-gnu
BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH))
# All warnings are inhibited since GCC builds are very experimental,
......@@ -434,8 +435,11 @@ $(obj)/core.o: private skip_clippy = 1
$(obj)/core.o: private skip_flags = -Dunreachable_pub
$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs scripts/target.json FORCE
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
$(call if_changed_dep,rustc_library)
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_LOONGARCH)),)
$(obj)/core.o: scripts/target.json
endif
$(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*'
$(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE
......
......@@ -11,12 +11,14 @@ hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen
always-$(CONFIG_RUST) += target.json
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_LOONGARCH)),)
always-$(CONFIG_RUST) += target.json
filechk_rust_target = $< < include/config/auto.conf
$(obj)/target.json: scripts/generate_rust_target include/config/auto.conf FORCE
$(call filechk,rust_target)
endif
hostprogs += generate_rust_target
generate_rust_target-rust := y
......
......@@ -148,7 +148,9 @@ fn main() {
let mut ts = TargetSpec::new();
// `llvm-target`s are taken from `scripts/Makefile.clang`.
if cfg.has("X86_64") {
if cfg.has("ARM64") {
panic!("arm64 uses the builtin rustc aarch64-unknown-none target");
} else if cfg.has("X86_64") {
ts.push("arch", "x86_64");
ts.push(
"data-layout",
......
......@@ -58,11 +58,46 @@ static void cssc_sigill(void)
asm volatile(".inst 0xdac01c00" : : : "x0");
}
static void f8cvt_sigill(void)
{
/* FSCALE V0.4H, V0.4H, V0.4H */
asm volatile(".inst 0x2ec03c00");
}
static void f8dp2_sigill(void)
{
/* FDOT V0.4H, V0.4H, V0.5H */
asm volatile(".inst 0xe40fc00");
}
static void f8dp4_sigill(void)
{
/* FDOT V0.2S, V0.2S, V0.2S */
asm volatile(".inst 0xe00fc00");
}
static void f8fma_sigill(void)
{
/* FMLALB V0.8H, V0.16B, V0.16B */
asm volatile(".inst 0xec0fc00");
}
static void faminmax_sigill(void)
{
/* FAMIN V0.4H, V0.4H, V0.4H */
asm volatile(".inst 0x2ec01c00");
}
static void fp_sigill(void)
{
asm volatile("fmov s0, #1");
}
static void fpmr_sigill(void)
{
asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0");
}
static void ilrcpc_sigill(void)
{
/* LDAPUR W0, [SP, #8] */
......@@ -95,6 +130,12 @@ static void lse128_sigill(void)
: "cc", "memory");
}
static void lut_sigill(void)
{
/* LUTI2 V0.16B, { V0.16B }, V[0] */
asm volatile(".inst 0x4e801000");
}
static void mops_sigill(void)
{
char dst[1], src[1];
......@@ -216,6 +257,78 @@ static void smef16f16_sigill(void)
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smef8f16_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* FDOT ZA.H[W0, 0], Z0.B-Z1.B, Z0.B-Z1.B */
asm volatile(".inst 0xc1a01020" : : : );
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smef8f32_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* FDOT ZA.S[W0, 0], { Z0.B-Z1.B }, Z0.B[0] */
asm volatile(".inst 0xc1500038" : : : );
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smelutv2_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* LUTI4 { Z0.B-Z3.B }, ZT0, { Z0-Z1 } */
asm volatile(".inst 0xc08b0000" : : : );
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smesf8dp2_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* FDOT Z0.H, Z0.B, Z0.B[0] */
asm volatile(".inst 0x64204400" : : : );
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smesf8dp4_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* FDOT Z0.S, Z0.B, Z0.B[0] */
asm volatile(".inst 0xc1a41C00" : : : );
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void smesf8fma_sigill(void)
{
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
/* FMLALB V0.8H, V0.16B, V0.16B */
asm volatile(".inst 0xec0fc00");
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
static void sve_sigill(void)
{
/* RDVL x0, #0 */
......@@ -353,6 +466,53 @@ static const struct hwcap_data {
.cpuinfo = "cssc",
.sigill_fn = cssc_sigill,
},
{
.name = "F8CVT",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8CVT,
.cpuinfo = "f8cvt",
.sigill_fn = f8cvt_sigill,
},
{
.name = "F8DP4",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8DP4,
.cpuinfo = "f8dp4",
.sigill_fn = f8dp4_sigill,
},
{
.name = "F8DP2",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8DP2,
.cpuinfo = "f8dp4",
.sigill_fn = f8dp2_sigill,
},
{
.name = "F8E5M2",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8E5M2,
.cpuinfo = "f8e5m2",
},
{
.name = "F8E4M3",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8E4M3,
.cpuinfo = "f8e4m3",
},
{
.name = "F8FMA",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_F8FMA,
.cpuinfo = "f8fma",
.sigill_fn = f8fma_sigill,
},
{
.name = "FAMINMAX",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_FAMINMAX,
.cpuinfo = "faminmax",
.sigill_fn = faminmax_sigill,
},
{
.name = "FP",
.at_hwcap = AT_HWCAP,
......@@ -360,6 +520,14 @@ static const struct hwcap_data {
.cpuinfo = "fp",
.sigill_fn = fp_sigill,
},
{
.name = "FPMR",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_FPMR,
.cpuinfo = "fpmr",
.sigill_fn = fpmr_sigill,
.sigill_reliable = true,
},
{
.name = "JSCVT",
.at_hwcap = AT_HWCAP,
......@@ -411,6 +579,13 @@ static const struct hwcap_data {
.cpuinfo = "lse128",
.sigill_fn = lse128_sigill,
},
{
.name = "LUT",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LUT,
.cpuinfo = "lut",
.sigill_fn = lut_sigill,
},
{
.name = "MOPS",
.at_hwcap = AT_HWCAP2,
......@@ -511,6 +686,48 @@ static const struct hwcap_data {
.cpuinfo = "smef16f16",
.sigill_fn = smef16f16_sigill,
},
{
.name = "SME F8F16",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_F8F16,
.cpuinfo = "smef8f16",
.sigill_fn = smef8f16_sigill,
},
{
.name = "SME F8F32",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_F8F32,
.cpuinfo = "smef8f32",
.sigill_fn = smef8f32_sigill,
},
{
.name = "SME LUTV2",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_LUTV2,
.cpuinfo = "smelutv2",
.sigill_fn = smelutv2_sigill,
},
{
.name = "SME SF8FMA",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_SF8FMA,
.cpuinfo = "smesf8fma",
.sigill_fn = smesf8fma_sigill,
},
{
.name = "SME SF8DP2",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_SF8DP2,
.cpuinfo = "smesf8dp2",
.sigill_fn = smesf8dp2_sigill,
},
{
.name = "SME SF8DP4",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_SF8DP4,
.cpuinfo = "smesf8dp4",
.sigill_fn = smesf8dp4_sigill,
},
{
.name = "SVE",
.at_hwcap = AT_HWCAP,
......
fp-pidbench
fp-ptrace
fp-stress
fpsimd-test
rdvl-sme
......
......@@ -5,7 +5,9 @@ top_srcdir = $(realpath ../../../../../)
CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := fp-stress \
TEST_GEN_PROGS := \
fp-ptrace \
fp-stress \
sve-ptrace sve-probe-vls \
vec-syscfg \
za-fork za-ptrace
......@@ -24,6 +26,7 @@ EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
# Build with nolibc to avoid effects due to libc's clone() support
$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
$(OUTPUT)/fp-ptrace: fp-ptrace.c fp-ptrace-asm.S
$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
......
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2021-3 ARM Limited.
//
// Assembly portion of the FP ptrace test
//
// Load values from memory into registers, break on a breakpoint, then
// break on a further breakpoint
//
#include "fp-ptrace.h"
#include "sme-inst.h"
.arch_extension sve
// Load and save register values with pauses for ptrace
//
// x0 - SVE in use
// x1 - SME in use
// x2 - SME2 in use
// x3 - FA64 supported
.globl load_and_save
load_and_save:
stp x11, x12, [sp, #-0x10]!
// This should be redundant in the SVE case
ldr x7, =v_in
ldp q0, q1, [x7]
ldp q2, q3, [x7, #16 * 2]
ldp q4, q5, [x7, #16 * 4]
ldp q6, q7, [x7, #16 * 6]
ldp q8, q9, [x7, #16 * 8]
ldp q10, q11, [x7, #16 * 10]
ldp q12, q13, [x7, #16 * 12]
ldp q14, q15, [x7, #16 * 14]
ldp q16, q17, [x7, #16 * 16]
ldp q18, q19, [x7, #16 * 18]
ldp q20, q21, [x7, #16 * 20]
ldp q22, q23, [x7, #16 * 22]
ldp q24, q25, [x7, #16 * 24]
ldp q26, q27, [x7, #16 * 26]
ldp q28, q29, [x7, #16 * 28]
ldp q30, q31, [x7, #16 * 30]
// SME?
cbz x1, check_sve_in
adrp x7, svcr_in
ldr x7, [x7, :lo12:svcr_in]
// SVCR is 0 by default, avoid triggering SME if not in use
cbz x7, check_sve_in
msr S3_3_C4_C2_2, x7
// ZA?
tbz x7, #SVCR_ZA_SHIFT, check_sm_in
rdsvl 11, 1
mov w12, #0
ldr x6, =za_in
1: _ldr_za 12, 6
add x6, x6, x11
add x12, x12, #1
cmp x11, x12
bne 1b
// ZT?
cbz x2, check_sm_in
adrp x6, zt_in
add x6, x6, :lo12:zt_in
_ldr_zt 6
// In streaming mode?
check_sm_in:
tbz x7, #SVCR_SM_SHIFT, check_sve_in
mov x4, x3 // Load FFR if we have FA64
b load_sve
// SVE?
check_sve_in:
cbz x0, wait_for_writes
mov x4, #1
load_sve:
ldr x7, =z_in
ldr z0, [x7, #0, MUL VL]
ldr z1, [x7, #1, MUL VL]
ldr z2, [x7, #2, MUL VL]
ldr z3, [x7, #3, MUL VL]
ldr z4, [x7, #4, MUL VL]
ldr z5, [x7, #5, MUL VL]
ldr z6, [x7, #6, MUL VL]
ldr z7, [x7, #7, MUL VL]
ldr z8, [x7, #8, MUL VL]
ldr z9, [x7, #9, MUL VL]
ldr z10, [x7, #10, MUL VL]
ldr z11, [x7, #11, MUL VL]
ldr z12, [x7, #12, MUL VL]
ldr z13, [x7, #13, MUL VL]
ldr z14, [x7, #14, MUL VL]
ldr z15, [x7, #15, MUL VL]
ldr z16, [x7, #16, MUL VL]
ldr z17, [x7, #17, MUL VL]
ldr z18, [x7, #18, MUL VL]
ldr z19, [x7, #19, MUL VL]
ldr z20, [x7, #20, MUL VL]
ldr z21, [x7, #21, MUL VL]
ldr z22, [x7, #22, MUL VL]
ldr z23, [x7, #23, MUL VL]
ldr z24, [x7, #24, MUL VL]
ldr z25, [x7, #25, MUL VL]
ldr z26, [x7, #26, MUL VL]
ldr z27, [x7, #27, MUL VL]
ldr z28, [x7, #28, MUL VL]
ldr z29, [x7, #29, MUL VL]
ldr z30, [x7, #30, MUL VL]
ldr z31, [x7, #31, MUL VL]
// FFR is not present in base SME
cbz x4, 1f
ldr x7, =ffr_in
ldr p0, [x7]
ldr x7, [x7, #0]
cbz x7, 1f
wrffr p0.b
1:
ldr x7, =p_in
ldr p0, [x7, #0, MUL VL]
ldr p1, [x7, #1, MUL VL]
ldr p2, [x7, #2, MUL VL]
ldr p3, [x7, #3, MUL VL]
ldr p4, [x7, #4, MUL VL]
ldr p5, [x7, #5, MUL VL]
ldr p6, [x7, #6, MUL VL]
ldr p7, [x7, #7, MUL VL]
ldr p8, [x7, #8, MUL VL]
ldr p9, [x7, #9, MUL VL]
ldr p10, [x7, #10, MUL VL]
ldr p11, [x7, #11, MUL VL]
ldr p12, [x7, #12, MUL VL]
ldr p13, [x7, #13, MUL VL]
ldr p14, [x7, #14, MUL VL]
ldr p15, [x7, #15, MUL VL]
wait_for_writes:
// Wait for the parent
brk #0
// Save values
ldr x7, =v_out
stp q0, q1, [x7]
stp q2, q3, [x7, #16 * 2]
stp q4, q5, [x7, #16 * 4]
stp q6, q7, [x7, #16 * 6]
stp q8, q9, [x7, #16 * 8]
stp q10, q11, [x7, #16 * 10]
stp q12, q13, [x7, #16 * 12]
stp q14, q15, [x7, #16 * 14]
stp q16, q17, [x7, #16 * 16]
stp q18, q19, [x7, #16 * 18]
stp q20, q21, [x7, #16 * 20]
stp q22, q23, [x7, #16 * 22]
stp q24, q25, [x7, #16 * 24]
stp q26, q27, [x7, #16 * 26]
stp q28, q29, [x7, #16 * 28]
stp q30, q31, [x7, #16 * 30]
// SME?
cbz x1, check_sve_out
rdsvl 11, 1
adrp x6, sme_vl_out
str x11, [x6, :lo12:sme_vl_out]
mrs x7, S3_3_C4_C2_2
adrp x6, svcr_out
str x7, [x6, :lo12:svcr_out]
// ZA?
tbz x7, #SVCR_ZA_SHIFT, check_sm_out
mov w12, #0
ldr x6, =za_out
1: _str_za 12, 6
add x6, x6, x11
add x12, x12, #1
cmp x11, x12
bne 1b
// ZT?
cbz x2, check_sm_out
adrp x6, zt_out
add x6, x6, :lo12:zt_out
_str_zt 6
// In streaming mode?
check_sm_out:
tbz x7, #SVCR_SM_SHIFT, check_sve_out
mov x4, x3 // FFR?
b read_sve
// SVE?
check_sve_out:
cbz x0, wait_for_reads
mov x4, #1
rdvl x7, #1
adrp x6, sve_vl_out
str x7, [x6, :lo12:sve_vl_out]
read_sve:
ldr x7, =z_out
str z0, [x7, #0, MUL VL]
str z1, [x7, #1, MUL VL]
str z2, [x7, #2, MUL VL]
str z3, [x7, #3, MUL VL]
str z4, [x7, #4, MUL VL]
str z5, [x7, #5, MUL VL]
str z6, [x7, #6, MUL VL]
str z7, [x7, #7, MUL VL]
str z8, [x7, #8, MUL VL]
str z9, [x7, #9, MUL VL]
str z10, [x7, #10, MUL VL]
str z11, [x7, #11, MUL VL]
str z12, [x7, #12, MUL VL]
str z13, [x7, #13, MUL VL]
str z14, [x7, #14, MUL VL]
str z15, [x7, #15, MUL VL]
str z16, [x7, #16, MUL VL]
str z17, [x7, #17, MUL VL]
str z18, [x7, #18, MUL VL]
str z19, [x7, #19, MUL VL]
str z20, [x7, #20, MUL VL]
str z21, [x7, #21, MUL VL]
str z22, [x7, #22, MUL VL]
str z23, [x7, #23, MUL VL]
str z24, [x7, #24, MUL VL]
str z25, [x7, #25, MUL VL]
str z26, [x7, #26, MUL VL]
str z27, [x7, #27, MUL VL]
str z28, [x7, #28, MUL VL]
str z29, [x7, #29, MUL VL]
str z30, [x7, #30, MUL VL]
str z31, [x7, #31, MUL VL]
ldr x7, =p_out
str p0, [x7, #0, MUL VL]
str p1, [x7, #1, MUL VL]
str p2, [x7, #2, MUL VL]
str p3, [x7, #3, MUL VL]
str p4, [x7, #4, MUL VL]
str p5, [x7, #5, MUL VL]
str p6, [x7, #6, MUL VL]
str p7, [x7, #7, MUL VL]
str p8, [x7, #8, MUL VL]
str p9, [x7, #9, MUL VL]
str p10, [x7, #10, MUL VL]
str p11, [x7, #11, MUL VL]
str p12, [x7, #12, MUL VL]
str p13, [x7, #13, MUL VL]
str p14, [x7, #14, MUL VL]
str p15, [x7, #15, MUL VL]
// Only save FFR if it exists
cbz x4, wait_for_reads
ldr x7, =ffr_out
rdffr p0.b
str p0, [x7]
wait_for_reads:
// Wait for the parent
brk #0
// Ensure we don't leave ourselves in streaming mode
cbz x1, out
msr S3_3_C4_C2_2, xzr
out:
ldp x11, x12, [sp, #-0x10]
ret
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2021-3 ARM Limited.
#ifndef FP_PTRACE_H
#define FP_PTRACE_H
#define SVCR_SM_SHIFT 0
#define SVCR_ZA_SHIFT 1
#define SVCR_SM (1 << SVCR_SM_SHIFT)
#define SVCR_ZA (1 << SVCR_ZA_SHIFT)
#endif
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
fpmr_*
sme_*
ssve_*
sve_*
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2023 ARM Limited
*
* Verify that the FPMR register context in signal frames is set up as
* expected.
*/
#include <signal.h>
#include <ucontext.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <asm/sigcontext.h>
#include "test_signals_utils.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
#define SYS_FPMR "S3_3_C4_C4_2"
static uint64_t get_fpmr(void)
{
uint64_t val;
asm volatile (
"mrs %0, " SYS_FPMR "\n"
: "=r"(val)
:
: "cc");
return val;
}
int fpmr_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
{
struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
struct fpmr_context *fpmr_ctx;
size_t offset;
bool in_sigframe;
bool have_fpmr;
__u64 orig_fpmr;
have_fpmr = getauxval(AT_HWCAP2) & HWCAP2_FPMR;
if (have_fpmr)
orig_fpmr = get_fpmr();
if (!get_current_context(td, &context.uc, sizeof(context)))
return 1;
fpmr_ctx = (struct fpmr_context *)
get_header(head, FPMR_MAGIC, td->live_sz, &offset);
in_sigframe = fpmr_ctx != NULL;
fprintf(stderr, "FPMR sigframe %s on system %s FPMR\n",
in_sigframe ? "present" : "absent",
have_fpmr ? "with" : "without");
td->pass = (in_sigframe == have_fpmr);
if (have_fpmr && fpmr_ctx) {
if (fpmr_ctx->fpmr != orig_fpmr) {
fprintf(stderr, "FPMR in frame is %llx, was %llx\n",
fpmr_ctx->fpmr, orig_fpmr);
td->pass = false;
}
}
return 0;
}
struct tdescr tde = {
.name = "FPMR",
.descr = "Validate that FPMR is present as expected",
.timeout = 3,
.run = fpmr_present,
};
......@@ -209,6 +209,14 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
zt = (struct zt_context *)head;
new_flags |= ZT_CTX;
break;
case FPMR_MAGIC:
if (flags & FPMR_CTX)
*err = "Multiple FPMR_MAGIC";
else if (head->size !=
sizeof(struct fpmr_context))
*err = "Bad size for fpmr_context";
new_flags |= FPMR_CTX;
break;
case EXTRA_MAGIC:
if (flags & EXTRA_CTX)
*err = "Multiple EXTRA_MAGIC";
......
......@@ -19,6 +19,7 @@
#define ZA_CTX (1 << 2)
#define EXTRA_CTX (1 << 3)
#define ZT_CTX (1 << 4)
#define FPMR_CTX (1 << 5)
#define KSFT_BAD_MAGIC 0xdeadbeef
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment