Commit 12952b6b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'loongarch-6.6' of...

Merge tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:

 - Allow usage of LSX/LASX in the kernel, and use them for
   SIMD-optimized RAID5/RAID6 routines

 - Add Loongson Binary Translation (LBT) extension support

 - Add basic KGDB & KDB support

 - Add building with kcov coverage

 - Add KFENCE (Kernel Electric-Fence) support

 - Add KASAN (Kernel Address Sanitizer) support

 - Some bug fixes and other small changes

 - Update the default config file

* tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (25 commits)
  LoongArch: Update Loongson-3 default config file
  LoongArch: Add KASAN (Kernel Address Sanitizer) support
  LoongArch: Simplify the processing of jumping new kernel for KASLR
  kasan: Add (pmd|pud)_init for LoongArch zero_(pud|p4d)_populate process
  kasan: Add __HAVE_ARCH_SHADOW_MAP to support arch specific mapping
  LoongArch: Add KFENCE (Kernel Electric-Fence) support
  LoongArch: Get partial stack information when providing regs parameter
  LoongArch: mm: Add page table mapped mode support for virt_to_page()
  kfence: Defer the assignment of the local variable addr
  LoongArch: Allow building with kcov coverage
  LoongArch: Provide kaslr_offset() to get kernel offset
  LoongArch: Add basic KGDB & KDB support
  LoongArch: Add Loongson Binary Translation (LBT) extension support
  raid6: Add LoongArch SIMD recovery implementation
  raid6: Add LoongArch SIMD syndrome calculation
  LoongArch: Add SIMD-optimized XOR routines
  LoongArch: Allow usage of LSX/LASX in the kernel
  LoongArch: Define symbol 'fault' as a local label in fpu.S
  LoongArch: Adjust {copy, clear}_user exception handler behavior
  LoongArch: Use static defined zero page rather than allocated
  ...
parents 01a46efc 671eae93
......@@ -41,8 +41,8 @@ Support
Architectures
~~~~~~~~~~~~~
Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
xtensa, and the tag-based KASAN modes are supported only on arm64.
Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
and loongarch, and the tag-based KASAN modes are supported only on arm64.
Compilers
~~~~~~~~~
......
......@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| loongarch: | TODO |
| loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
......
......@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| loongarch: | TODO |
| loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
......
......@@ -13,7 +13,7 @@
| csky: | TODO |
| hexagon: | ok |
| ia64: | TODO |
| loongarch: | TODO |
| loongarch: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
......
......@@ -42,7 +42,7 @@ KASAN有三种模式:
体系架构
~~~~~~~~
在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
而基于标签的KASAN模式只在arm64上支持。
编译器
......
......@@ -8,11 +8,13 @@ config LOONGARCH
select ACPI_PPTT if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_BINFMT_ELF_STATE
select ARCH_DISABLE_KASAN_INLINE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KCOV
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
......@@ -91,6 +93,9 @@ config LOONGARCH
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
select HAVE_ARCH_KFENCE
select HAVE_ARCH_KGDB if PERF_EVENTS
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
......@@ -115,6 +120,7 @@ config LOONGARCH
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
......@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
config AS_HAS_LASX_EXTENSION
def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
config AS_HAS_LBT_EXTENSION
def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
menu "Kernel type and options"
source "kernel/Kconfig.hz"
......@@ -534,6 +543,18 @@ config CPU_HAS_LASX
If unsure, say Y.
config CPU_HAS_LBT
bool "Support for the Loongson Binary Translation Extension"
depends on AS_HAS_LBT_EXTENSION
help
Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
Enabling this option allows the kernel to allocate and switch registers
specific to LBT.
If you want to use this feature, such as the Loongson Architecture
Translator (LAT), say Y.
config CPU_HAS_PREFETCH
bool
default y
......@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
config ARCH_SUPPORTS_UPROBES
def_bool y
config KASAN_SHADOW_OFFSET
hex
default 0x0
depends on KASAN
menu "Power management options"
config ARCH_SUSPEND_POSSIBLE
......
......@@ -84,7 +84,10 @@ LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext
endif
cflags-y += $(call cc-option, -mno-check-zero-division)
ifndef CONFIG_KASAN
cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
endif
load-y = 0x9000000000200000
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
......
......@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_SYSFS_DEPRECATED=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
......@@ -47,8 +46,12 @@ CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_NR_CPUS=64
CONFIG_NUMA=y
CONFIG_CPU_HAS_FPU=y
CONFIG_CPU_HAS_LSX=y
CONFIG_CPU_HAS_LASX=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_RANDOMIZE_BASE=y
CONFIG_SUSPEND=y
CONFIG_HIBERNATION=y
CONFIG_ACPI=y
......@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
CONFIG_EFI_CAPSULE_LOADER=m
CONFIG_EFI_TEST=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
......@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_INET_ESP=m
CONFIG_INET_UDP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y
......@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
......@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
CONFIG_IP_VS_IPV6=y
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_PROTO_UDP=y
CONFIG_IP_VS_PROTO_ESP=y
CONFIG_IP_VS_PROTO_AH=y
CONFIG_IP_VS_PROTO_SCTP=y
CONFIG_IP_VS_RR=m
CONFIG_IP_VS_WRR=m
CONFIG_IP_VS_NFCT=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_DUP_IPV4=m
......@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_TARGET_NETMAP=m
CONFIG_IP_NF_TARGET_REDIRECT=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
......@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
CONFIG_MTD_UBI=m
CONFIG_MTD_UBI_BLOCK=y
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_SERIAL=y
......@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
CONFIG_ZRAM=m
CONFIG_ZRAM_DEF_COMP_ZSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
......@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
CONFIG_NGBE=y
CONFIG_TXGBE=y
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
......@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_PIIX4=y
CONFIG_I2C_GPIO=y
CONFIG_I2C_LS2X=y
CONFIG_SPI=y
CONFIG_SPI_LOONGSON_PCI=m
CONFIG_SPI_LOONGSON_PLATFORM=m
CONFIG_PINCTRL=y
CONFIG_PINCTRL_LOONGSON2=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_LOONGSON=y
CONFIG_GPIO_LOONGSON_64BIT=y
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_RESTART=y
CONFIG_POWER_RESET_SYSCON=y
......@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
CONFIG_SENSORS_LM93=m
CONFIG_SENSORS_W83795=m
CONFIG_SENSORS_W83627HF=m
CONFIG_LOONGSON2_THERMAL=m
CONFIG_RC_CORE=m
CONFIG_LIRC=y
CONFIG_RC_DECODERS=y
......@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
CONFIG_DRM_AST=y
CONFIG_DRM_QXL=m
CONFIG_DRM_VIRTIO_GPU=m
CONFIG_DRM_LOONGSON=y
CONFIG_FB=y
CONFIG_FB_EFI=y
CONFIG_FB_RADEON=y
......@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
CONFIG_INFINIBAND=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
CONFIG_RTC_DRV_LOONGSON=y
CONFIG_DMADEVICES=y
CONFIG_UIO=m
CONFIG_UIO_PDRV_GENIRQ=m
......@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
CONFIG_COMEDI_NI_PCIDIO=m
CONFIG_COMEDI_NI_PCIMIO=m
CONFIG_STAGING=y
CONFIG_R8188EU=m
CONFIG_COMMON_CLK_LOONGSON2=y
CONFIG_LOONGSON2_GUTS=y
CONFIG_LOONGSON2_PM=y
CONFIG_PM_DEVFREQ=y
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
......@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
......@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=y
CONFIG_OVERLAY_FS_INDEX=y
CONFIG_OVERLAY_FS_XINO_AUTO=y
CONFIG_OVERLAY_FS_METACOPY=y
CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
......@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=936
CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
CONFIG_EXFAT_FS=m
CONFIG_NTFS3_FS=m
CONFIG_NTFS3_64BIT_CLUSTER=y
CONFIG_NTFS3_LZX_XPRESS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=y
CONFIG_ORANGEFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_ECRYPT_FS_MESSAGING=y
CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
CONFIG_UBIFS_FS=m
CONFIG_UBIFS_FS_ADVANCED_COMPR=y
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
CONFIG_MINIX_FS=m
CONFIG_ROMFS_FS=m
CONFIG_PSTORE=m
CONFIG_PSTORE_LZO_COMPRESS=m
CONFIG_PSTORE_LZ4_COMPRESS=m
CONFIG_PSTORE_LZ4HC_COMPRESS=m
CONFIG_PSTORE_842_COMPRESS=y
CONFIG_PSTORE_ZSTD_COMPRESS=y
CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
CONFIG_EROFS_FS=m
CONFIG_EROFS_FS_ZIP_LZMA=y
CONFIG_EROFS_FS_PCPU_KTHREAD=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
......@@ -807,6 +867,10 @@ CONFIG_NFSD=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_BLOCKLAYOUT=y
CONFIG_CEPH_FS=m
CONFIG_CEPH_FSCACHE=y
CONFIG_CEPH_FS_POSIX_ACL=y
CONFIG_CEPH_FS_SECURITY_LABEL=y
CONFIG_CIFS=m
# CONFIG_CIFS_DEBUG is not set
CONFIG_9P_FS=y
......@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_UTF8=y
CONFIG_DLM=m
CONFIG_KEY_DH_OPERATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_SELINUX=y
......@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_CRC32_LOONGARCH=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/uaccess.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/ftrace.h>
......
......@@ -10,113 +10,6 @@
#include <asm/fpregdef.h>
#include <asm/loongarch.h>
.macro parse_v var val
\var = \val
.endm
.macro parse_r var r
\var = -1
.ifc \r, $r0
\var = 0
.endif
.ifc \r, $r1
\var = 1
.endif
.ifc \r, $r2
\var = 2
.endif
.ifc \r, $r3
\var = 3
.endif
.ifc \r, $r4
\var = 4
.endif
.ifc \r, $r5
\var = 5
.endif
.ifc \r, $r6
\var = 6
.endif
.ifc \r, $r7
\var = 7
.endif
.ifc \r, $r8
\var = 8
.endif
.ifc \r, $r9
\var = 9
.endif
.ifc \r, $r10
\var = 10
.endif
.ifc \r, $r11
\var = 11
.endif
.ifc \r, $r12
\var = 12
.endif
.ifc \r, $r13
\var = 13
.endif
.ifc \r, $r14
\var = 14
.endif
.ifc \r, $r15
\var = 15
.endif
.ifc \r, $r16
\var = 16
.endif
.ifc \r, $r17
\var = 17
.endif
.ifc \r, $r18
\var = 18
.endif
.ifc \r, $r19
\var = 19
.endif
.ifc \r, $r20
\var = 20
.endif
.ifc \r, $r21
\var = 21
.endif
.ifc \r, $r22
\var = 22
.endif
.ifc \r, $r23
\var = 23
.endif
.ifc \r, $r24
\var = 24
.endif
.ifc \r, $r25
\var = 25
.endif
.ifc \r, $r26
\var = 26
.endif
.ifc \r, $r27
\var = 27
.endif
.ifc \r, $r28
\var = 28
.endif
.ifc \r, $r29
\var = 29
.endif
.ifc \r, $r30
\var = 30
.endif
.ifc \r, $r31
\var = 31
.endif
.iflt \var
.error "Unable to parse register name \r"
.endif
.endm
.macro cpu_save_nonscratch thread
stptr.d s0, \thread, THREAD_REG23
stptr.d s1, \thread, THREAD_REG24
......@@ -148,12 +41,51 @@
.macro fpu_save_csr thread tmp
movfcsr2gr \tmp, fcsr0
stptr.w \tmp, \thread, THREAD_FCSR
stptr.w \tmp, \thread, THREAD_FCSR
#ifdef CONFIG_CPU_HAS_LBT
/* TM bit is always 0 if LBT not supported */
andi \tmp, \tmp, FPU_CSR_TM
beqz \tmp, 1f
/* Save FTOP */
x86mftop \tmp
stptr.w \tmp, \thread, THREAD_FTOP
/* Turn off TM to ensure the order of FPR in memory independent of TM */
x86clrtm
1:
#endif
.endm
.macro fpu_restore_csr thread tmp
ldptr.w \tmp, \thread, THREAD_FCSR
movgr2fcsr fcsr0, \tmp
.macro fpu_restore_csr thread tmp0 tmp1
ldptr.w \tmp0, \thread, THREAD_FCSR
movgr2fcsr fcsr0, \tmp0
#ifdef CONFIG_CPU_HAS_LBT
/* TM bit is always 0 if LBT not supported */
andi \tmp0, \tmp0, FPU_CSR_TM
beqz \tmp0, 2f
/* Restore FTOP */
ldptr.w \tmp0, \thread, THREAD_FTOP
andi \tmp0, \tmp0, 0x7
la.pcrel \tmp1, 1f
alsl.d \tmp1, \tmp0, \tmp1, 3
jr \tmp1
1:
x86mttop 0
b 2f
x86mttop 1
b 2f
x86mttop 2
b 2f
x86mttop 3
b 2f
x86mttop 4
b 2f
x86mttop 5
b 2f
x86mttop 6
b 2f
x86mttop 7
2:
#endif
.endm
.macro fpu_save_cc thread tmp0 tmp1
......@@ -353,7 +285,7 @@
.macro lsx_restore_all thread tmp0 tmp1
lsx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lsx_save_upper vd base tmp off
......@@ -563,7 +495,7 @@
.macro lasx_restore_all thread tmp0 tmp1
lasx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lasx_save_upper xd base tmp off
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
#include <linux/mmzone.h>
#include <asm/addrspace.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#define __HAVE_ARCH_SHADOW_MAP
#define KASAN_SHADOW_SCALE_SHIFT 3
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
#define XRANGE_SHIFT (48)
/* Valid address length */
#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
/* Used for taking out the valid address */
#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
/* One segment whole address space size */
#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1)
/* 64-bit segment value. */
#define XKPRANGE_UC_SEG (0x8000)
#define XKPRANGE_CC_SEG (0x9000)
#define XKVRANGE_VC_SEG (0xffff)
/* Cached */
#define XKPRANGE_CC_START CACHE_BASE
#define XKPRANGE_CC_SIZE XRANGE_SIZE
#define XKPRANGE_CC_KASAN_OFFSET (0)
#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
/* UnCached */
#define XKPRANGE_UC_START UNCACHE_BASE
#define XKPRANGE_UC_SIZE XRANGE_SIZE
#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END
#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
/* VMALLOC (Cached or UnCached) */
#define XKVRANGE_VC_START MODULES_VADDR
#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END
#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
/* KAsan shadow memory start right after vmalloc. */
#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE)
#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
extern bool kasan_early_stage;
extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
#define kasan_arch_is_ready kasan_arch_is_ready
static __always_inline bool kasan_arch_is_ready(void)
{
return !kasan_early_stage;
}
static inline void *kasan_mem_to_shadow(const void *addr)
{
if (!kasan_arch_is_ready()) {
return (void *)(kasan_early_shadow_page);
} else {
unsigned long maddr = (unsigned long)addr;
unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
unsigned long offset = 0;
maddr &= XRANGE_SHADOW_MASK;
switch (xrange) {
case XKPRANGE_CC_SEG:
offset = XKPRANGE_CC_SHADOW_OFFSET;
break;
case XKPRANGE_UC_SEG:
offset = XKPRANGE_UC_SHADOW_OFFSET;
break;
case XKVRANGE_VC_SEG:
offset = XKVRANGE_VC_SHADOW_OFFSET;
break;
default:
WARN_ON(1);
return NULL;
}
return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
}
}
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
unsigned long addr = (unsigned long)shadow_addr;
if (unlikely(addr > KASAN_SHADOW_END) ||
unlikely(addr < KASAN_SHADOW_START)) {
WARN_ON(1);
return NULL;
}
if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
else {
WARN_ON(1);
return NULL;
}
}
void kasan_init(void);
asmlinkage void kasan_early_init(void);
#endif
#endif
/* SPDX-License-Identifier: GPL-2.0 */
/*
* KFENCE support for LoongArch.
*
* Author: Enze Li <lienze@kylinos.cn>
* Copyright (C) 2022-2023 KylinSoft Corporation.
*/
#ifndef _ASM_LOONGARCH_KFENCE_H
#define _ASM_LOONGARCH_KFENCE_H
#include <linux/kfence.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
static inline bool arch_kfence_init_pool(void)
{
int err;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
KFENCE_AREA_START, KFENCE_AREA_END,
__builtin_return_address(0));
if (!area)
return false;
__kfence_pool = (char *)area->addr;
err = ioremap_page_range((unsigned long)__kfence_pool,
(unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
if (err) {
free_vm_area(area);
__kfence_pool = kfence_pool;
return false;
}
return true;
}
/* Protect the given page and flush TLB. */
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
pte_t *pte = virt_to_kpte(addr);
if (WARN_ON(!pte) || pte_none(*pte))
return false;
if (protect)
set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
else
set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
preempt_disable();
local_flush_tlb_one(addr);
preempt_enable();
return true;
}
#endif /* _ASM_LOONGARCH_KFENCE_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#ifndef _ASM_LOONGARCH_KGDB_H
#define _ASM_LOONGARCH_KGDB_H
#define GDB_SIZEOF_REG sizeof(u64)
/* gdb remote procotol expects the following register layout. */
/*
* General purpose registers:
* r0-r31: 64 bit
* orig_a0: 64 bit
* pc : 64 bit
* csr_badvaddr: 64 bit
*/
#define DBG_PT_REGS_BASE 0
#define DBG_PT_REGS_NUM 35
#define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
/*
* Floating point registers:
* f0-f31: 64 bit
*/
#define DBG_FPR_BASE (DBG_PT_REGS_END + 1)
#define DBG_FPR_NUM 32
#define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1)
/*
* Condition Flag registers:
* fcc0-fcc8: 8 bit
*/
#define DBG_FCC_BASE (DBG_FPR_END + 1)
#define DBG_FCC_NUM 8
#define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1)
/*
* Floating-point Control and Status registers:
* fcsr: 32 bit
*/
#define DBG_FCSR_NUM 1
#define DBG_FCSR (DBG_FCC_END + 1)
#define DBG_MAX_REG_NUM (DBG_FCSR + 1)
/*
* Size of I/O buffer for gdb packet.
* considering to hold all register contents, size is set
*/
#define BUFMAX 2048
/*
* Number of bytes required for gdb_regs buffer.
* PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
* GDB fails to connect for size beyond this with error
* "'g' packet reply is too long"
*/
#define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
#define BREAK_INSTR_SIZE 4
#define CACHE_FLUSH_IS_SAFE 0
/* Register numbers of various important registers. */
enum dbg_loongarch_regnum {
DBG_LOONGARCH_ZERO = 0,
DBG_LOONGARCH_RA,
DBG_LOONGARCH_TP,
DBG_LOONGARCH_SP,
DBG_LOONGARCH_A0,
DBG_LOONGARCH_FP = 22,
DBG_LOONGARCH_S0,
DBG_LOONGARCH_S1,
DBG_LOONGARCH_S2,
DBG_LOONGARCH_S3,
DBG_LOONGARCH_S4,
DBG_LOONGARCH_S5,
DBG_LOONGARCH_S6,
DBG_LOONGARCH_S7,
DBG_LOONGARCH_S8,
DBG_LOONGARCH_ORIG_A0,
DBG_LOONGARCH_PC,
DBG_LOONGARCH_BADV
};
void kgdb_breakinst(void);
void arch_kgdb_breakpoint(void);
#ifdef CONFIG_KGDB
bool kgdb_breakpoint_handler(struct pt_regs *regs);
#else /* !CONFIG_KGDB */
static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
#endif /* CONFIG_KGDB */
#endif /* __ASM_KGDB_H_ */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Author: Qi Hu <huqi@loongson.cn>
* Huacai Chen <chenhuacai@loongson.cn>
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#ifndef _ASM_LBT_H
#define _ASM_LBT_H
#include <asm/cpu.h>
#include <asm/current.h>
#include <asm/loongarch.h>
#include <asm/processor.h>
extern void _init_lbt(void);
extern void _save_lbt(struct loongarch_lbt *);
extern void _restore_lbt(struct loongarch_lbt *);
static inline int is_lbt_enabled(void)
{
if (!cpu_has_lbt)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
1 : 0;
}
static inline int is_lbt_owner(void)
{
return test_thread_flag(TIF_USEDLBT);
}
#ifdef CONFIG_CPU_HAS_LBT
static inline void enable_lbt(void)
{
if (cpu_has_lbt)
csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lbt(void)
{
if (cpu_has_lbt)
csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
}
static inline void __own_lbt(void)
{
enable_lbt();
set_thread_flag(TIF_USEDLBT);
KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
}
static inline void own_lbt_inatomic(int restore)
{
if (cpu_has_lbt && !is_lbt_owner()) {
__own_lbt();
if (restore)
_restore_lbt(&current->thread.lbt);
}
}
static inline void own_lbt(int restore)
{
preempt_disable();
own_lbt_inatomic(restore);
preempt_enable();
}
static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
{
if (cpu_has_lbt && is_lbt_owner()) {
if (save)
_save_lbt(&tsk->thread.lbt);
disable_lbt();
clear_tsk_thread_flag(tsk, TIF_USEDLBT);
}
KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
}
static inline void lose_lbt(int save)
{
preempt_disable();
lose_lbt_inatomic(save, current);
preempt_enable();
}
static inline void init_lbt(void)
{
__own_lbt();
_init_lbt();
}
#else
static inline void own_lbt_inatomic(int restore) {}
static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
static inline void init_lbt(void) {}
static inline void lose_lbt(int save) {}
#endif
static inline int thread_lbt_context_live(void)
{
if (!cpu_has_lbt)
return 0;
return test_thread_flag(TIF_LBT_CTX_LIVE);
}
#endif /* _ASM_LBT_H */
......@@ -12,49 +12,6 @@
#ifndef __ASSEMBLY__
#include <larchintrin.h>
/*
* parse_r var, r - Helper assembler macro for parsing register names.
*
* This converts the register name in $n form provided in \r to the
* corresponding register number, which is assigned to the variable \var. It is
* needed to allow explicit encoding of instructions in inline assembly where
* registers are chosen by the compiler in $n form, allowing us to avoid using
* fixed register numbers.
*
* It also allows newer instructions (not implemented by the assembler) to be
* transparently implemented using assembler macros, instead of needing separate
* cases depending on toolchain support.
*
* Simple usage example:
* __asm__ __volatile__("parse_r addr, %0\n\t"
* "#invtlb op, 0, %0\n\t"
* ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
* : "=r" (status);
*/
/* Match an individual register number and assign to \var */
#define _IFC_REG(n) \
".ifc \\r, $r" #n "\n\t" \
"\\var = " #n "\n\t" \
".endif\n\t"
__asm__(".macro parse_r var r\n\t"
"\\var = -1\n\t"
_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3)
_IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7)
_IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11)
_IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
_IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
_IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
_IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
_IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
".iflt \\var\n\t"
".error \"Unable to parse register name \\r\"\n\t"
".endif\n\t"
".endm");
#undef _IFC_REG
/* CPUCFG */
#define read_cpucfg(reg) __cpucfg(reg)
......@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
#define FPU_CSR_RU 0x200 /* towards +Infinity */
#define FPU_CSR_RD 0x300 /* towards -Infinity */
/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
#define FPU_CSR_TM_SHIFT 0x6
#define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
#define read_fcsr(source) \
({ \
unsigned int __res; \
......
......@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[(nid)])
extern void setup_zero_pages(void);
#endif /* _ASM_MMZONE_H_ */
......@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define virt_to_page(kaddr) \
({ \
(likely((unsigned long)kaddr < vm_map_base)) ? \
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
......
......@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
#endif /* __PAGETABLE_PUD_FOLDED */
extern pte_t * __init populate_kernel_pte(unsigned long addr);
#endif /* _ASM_PGALLOC_H */
......@@ -70,12 +70,9 @@ struct vm_area_struct;
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page;
extern unsigned long zero_page_mask;
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) \
(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
#define __HAVE_COLOR_ZERO_PAGE
#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
/*
* TLB refill handlers may also map the vmalloc area into xkvrange.
......@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
#define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
#define MODULES_END (MODULES_VADDR + SZ_256M)
#ifdef CONFIG_KFENCE
#define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
#else
#define KFENCE_AREA_SIZE 0
#endif
#define VMALLOC_START MODULES_END
#ifndef CONFIG_KASAN
#define VMALLOC_END \
(vm_map_base + \
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
#else
#define VMALLOC_END \
(vm_map_base + \
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
#endif
#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
#define KFENCE_AREA_START (VMEMMAP_END + 1)
#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
#ifndef __PAGETABLE_PMD_FOLDED
......@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
extern pgd_t swapper_pg_dir[];
extern pgd_t invalid_pg_dir[];
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
......@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
}
#endif /* CONFIG_NUMA_BALANCING */
#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0)
#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0)
/*
* We provide our own get_unmapped area to cope with the virtual aliasing
* constraints placed on us by the cache architecture.
......
......@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
BUILD_FPR_ACCESS(64)
struct loongarch_fpu {
unsigned int fcsr;
uint64_t fcc; /* 8x8 */
uint32_t fcsr;
uint32_t ftop;
union fpureg fpr[NUM_FPU_REGS];
};
struct loongarch_lbt {
/* Scratch registers */
unsigned long scr0;
unsigned long scr1;
unsigned long scr2;
unsigned long scr3;
/* Eflags register */
unsigned long eflags;
};
#define INIT_CPUMASK { \
{0,} \
}
......@@ -113,15 +124,6 @@ struct thread_struct {
unsigned long csr_ecfg;
unsigned long csr_badvaddr; /* Last user fault */
/* Scratch registers */
unsigned long scr0;
unsigned long scr1;
unsigned long scr2;
unsigned long scr3;
/* Eflags register */
unsigned long eflags;
/* Other stuff associated with the thread. */
unsigned long trap_nr;
unsigned long error_code;
......@@ -133,6 +135,7 @@ struct thread_struct {
* context because they are conditionally copied at fork().
*/
struct loongarch_fpu fpu FPU_ALIGN;
struct loongarch_lbt lbt; /* Also conditionally copied */
/* Hardware breakpoints pinned to this task. */
struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
......@@ -174,8 +177,9 @@ struct thread_struct {
* FPU & vector registers \
*/ \
.fpu = { \
.fcsr = 0, \
.fcc = 0, \
.fcsr = 0, \
.ftop = 0, \
.fpr = {{{0,},},}, \
}, \
.hbp_break = {0}, \
......
......@@ -7,6 +7,7 @@
#define _LOONGARCH_SETUP_H
#include <linux/types.h>
#include <asm/sections.h>
#include <uapi/asm/setup.h>
#define VECSIZE 0x200
......@@ -33,8 +34,13 @@ extern long __la_abs_end;
extern long __rela_dyn_begin;
extern long __rela_dyn_end;
extern void * __init relocate_kernel(void);
extern unsigned long __init relocate_kernel(void);
#endif
static inline unsigned long kaslr_offset(void)
{
return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
}
#endif /* __SETUP_H */
......@@ -158,6 +158,10 @@
cfi_st u0, PT_R21, \docfi
csrrd u0, PERCPU_BASE_KS
9:
#ifdef CONFIG_KGDB
li.w t0, CSR_CRMD_WE
csrxchg t0, t0, LOONGARCH_CSR_CRMD
#endif
.endm
.macro SAVE_ALL docfi=0
......
......@@ -7,11 +7,31 @@
#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, size_t __count);
extern void *__memset(void *__s, int __c, size_t __count);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
/*
* For files that are not instrumented (e.g. mm/slub.c) we
* should use not instrumented version of mem* functions.
*/
#define memset(s, c, n) __memset(s, c, n)
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memmove(dst, src, len) __memmove(dst, src, len)
#ifndef __NO_FORTIFY
#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
#endif
#endif
#endif /* _ASM_STRING_H */
......@@ -7,6 +7,7 @@
#include <asm/cpu-features.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
struct task_struct;
......@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
#define switch_to(prev, next, last) \
do { \
lose_fpu_inatomic(1, prev); \
lose_lbt_inatomic(1, prev); \
hw_breakpoint_thread_switch(next); \
(last) = __switch_to(prev, next, task_thread_info(next), \
__builtin_return_address(0), __builtin_frame_address(0)); \
......
......@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
#define TIF_SINGLESTEP 16 /* Single Step */
#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
......@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
#define _TIF_USEDLBT (1<<TIF_USEDLBT)
#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*/
#ifndef _ASM_LOONGARCH_XOR_H
#define _ASM_LOONGARCH_XOR_H
#include <asm/cpu-features.h>
#include <asm/xor_simd.h>
#ifdef CONFIG_CPU_HAS_LSX
static struct xor_block_template xor_block_lsx = {
.name = "lsx",
.do_2 = xor_lsx_2,
.do_3 = xor_lsx_3,
.do_4 = xor_lsx_4,
.do_5 = xor_lsx_5,
};
#define XOR_SPEED_LSX() \
do { \
if (cpu_has_lsx) \
xor_speed(&xor_block_lsx); \
} while (0)
#else /* CONFIG_CPU_HAS_LSX */
#define XOR_SPEED_LSX()
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
static struct xor_block_template xor_block_lasx = {
.name = "lasx",
.do_2 = xor_lasx_2,
.do_3 = xor_lasx_3,
.do_4 = xor_lasx_4,
.do_5 = xor_lasx_5,
};
#define XOR_SPEED_LASX() \
do { \
if (cpu_has_lasx) \
xor_speed(&xor_block_lasx); \
} while (0)
#else /* CONFIG_CPU_HAS_LASX */
#define XOR_SPEED_LASX()
#endif /* CONFIG_CPU_HAS_LASX */
/*
* For grins, also test the generic routines.
*
* More importantly: it cannot be ruled out at this point of time, that some
* future (maybe reduced) models could run the vector algorithms slower than
* the scalar ones, maybe for errata or micro-op reasons. It may be
* appropriate to revisit this after one or two more uarch generations.
*/
#include <asm-generic/xor.h>
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_8regs_p); \
xor_speed(&xor_block_32regs); \
xor_speed(&xor_block_32regs_p); \
XOR_SPEED_LSX(); \
XOR_SPEED_LASX(); \
} while (0)
#endif /* _ASM_LOONGARCH_XOR_H */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*/
#ifndef _ASM_LOONGARCH_XOR_SIMD_H
#define _ASM_LOONGARCH_XOR_SIMD_H
#ifdef CONFIG_CPU_HAS_LSX
void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2);
void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3);
void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4);
void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4, const unsigned long * __restrict p5);
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2);
void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3);
void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4);
void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4, const unsigned long * __restrict p5);
#endif /* CONFIG_CPU_HAS_LASX */
#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
......@@ -56,6 +56,12 @@ struct user_lasx_state {
uint64_t vregs[32*4];
};
struct user_lbt_state {
uint64_t scr[4];
uint32_t eflags;
uint32_t ftop;
};
struct user_watch_state {
uint64_t dbg_info;
struct {
......
......@@ -59,4 +59,14 @@ struct lasx_context {
__u32 fcsr;
};
/* LBT context */
#define LBT_CTX_MAGIC 0x42540001
#define LBT_CTX_ALIGN 8
struct lbt_context {
__u64 regs[4];
__u32 eflags;
__u32 ftop;
};
#endif /* _UAPI_ASM_SIGCONTEXT_H */
......@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
obj-$(CONFIG_CPU_HAS_LBT) += lbt.o
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
ifdef CONFIG_FUNCTION_TRACER
......@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
endif
KASAN_SANITIZE_efi.o := n
KASAN_SANITIZE_cpu-probe.o := n
KASAN_SANITIZE_traps.o := n
KASAN_SANITIZE_smp.o := n
KASAN_SANITIZE_vdso.o := n
obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
......@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
obj-$(CONFIG_UPROBES) += uprobes.o
......
......@@ -118,13 +118,6 @@ void output_thread_defines(void)
OFFSET(THREAD_CSRECFG, task_struct,
thread.csr_ecfg);
OFFSET(THREAD_SCR0, task_struct, thread.scr0);
OFFSET(THREAD_SCR1, task_struct, thread.scr1);
OFFSET(THREAD_SCR2, task_struct, thread.scr2);
OFFSET(THREAD_SCR3, task_struct, thread.scr3);
OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
OFFSET(THREAD_FPU, task_struct, thread.fpu);
OFFSET(THREAD_BVADDR, task_struct, \
......@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
OFFSET(THREAD_FCC, loongarch_fpu, fcc);
OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
BLANK();
}
void output_thread_lbt_defines(void)
{
OFFSET(THREAD_SCR0, loongarch_lbt, scr0);
OFFSET(THREAD_SCR1, loongarch_lbt, scr1);
OFFSET(THREAD_SCR2, loongarch_lbt, scr2);
OFFSET(THREAD_SCR3, loongarch_lbt, scr3);
OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
BLANK();
}
......
......@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_LVZ;
elf_hwcap |= HWCAP_LOONGARCH_LVZ;
}
#ifdef CONFIG_CPU_HAS_LBT
if (config & CPUCFG2_X86BT) {
c->options |= LOONGARCH_CPU_LBT_X86;
elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
}
if (config & CPUCFG2_ARMBT) {
c->options |= LOONGARCH_CPU_LBT_ARM;
elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
}
if (config & CPUCFG2_MIPSBT) {
c->options |= LOONGARCH_CPU_LBT_MIPS;
elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
}
#endif
config = read_cpucfg(LOONGARCH_CPUCFG6);
if (config & CPUCFG6_PMP)
......
......@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
SAVE_STATIC
#ifdef CONFIG_KGDB
li.w t1, CSR_CRMD_WE
csrxchg t1, t1, LOONGARCH_CSR_CRMD
#endif
move u0, t0
li.d tp, ~_THREAD_MASK
and tp, tp, sp
......
......@@ -22,7 +22,7 @@
.macro EX insn, reg, src, offs
.ex\@: \insn \reg, \src, \offs
_asm_extable .ex\@, fault
_asm_extable .ex\@, .L_fpu_fault
.endm
.macro sc_save_fp base
......@@ -138,6 +138,13 @@
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
EX st.w \tmp0, \base, 0
#if defined(CONFIG_CPU_HAS_LBT)
/* TM bit is always 0 if LBT not supported */
andi \tmp0, \tmp0, FPU_CSR_TM
beqz \tmp0, 1f
x86clrtm
1:
#endif
.endm
.macro sc_restore_fcsr base, tmp0
......@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
*/
SYM_FUNC_START(_restore_fp)
fpu_restore_double a0 t1 # clobbers t1
fpu_restore_csr a0 t1
fpu_restore_csr a0 t1 t2
fpu_restore_cc a0 t1 t2 # clobbers t1, t2
jr ra
SYM_FUNC_END(_restore_fp)
......@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
jr ra
SYM_FUNC_END(_restore_lasx_context)
SYM_FUNC_START(fault)
.L_fpu_fault:
li.w a0, -EFAULT # failure
jr ra
SYM_FUNC_END(fault)
......@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry) # kernel entry point
PTR_LI sp, (_THREAD_SIZE - PT_SIZE)
PTR_ADD sp, sp, tp
set_saved_sp sp, t0, t1
#endif
/* relocate_kernel() returns the new kernel entry point */
jr a0
ASM_BUG()
/* Jump to the new kernel: new_pc = current_pc + random_offset */
pcaddi t0, 0
add.d t0, t0, a0
jirl zero, t0, 0xc
#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_RELOCATABLE */
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
bl start_kernel
......
......@@ -8,19 +8,40 @@
#include <asm/fpu.h>
#include <asm/smp.h>
static unsigned int euen_mask = CSR_EUEN_FPEN;
/*
* The critical section between kernel_fpu_begin() and kernel_fpu_end()
* is non-reentrant. It is the caller's responsibility to avoid reentrance.
* See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
*/
static DEFINE_PER_CPU(bool, in_kernel_fpu);
static DEFINE_PER_CPU(unsigned int, euen_current);
void kernel_fpu_begin(void)
{
unsigned int *euen_curr;
preempt_disable();
WARN_ON(this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, true);
euen_curr = this_cpu_ptr(&euen_current);
if (!is_fpu_owner())
enable_fpu();
*euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
#ifdef CONFIG_CPU_HAS_LASX
if (*euen_curr & CSR_EUEN_LASXEN)
_save_lasx(&current->thread.fpu);
else
#endif
#ifdef CONFIG_CPU_HAS_LSX
if (*euen_curr & CSR_EUEN_LSXEN)
_save_lsx(&current->thread.fpu);
else
#endif
if (*euen_curr & CSR_EUEN_FPEN)
_save_fp(&current->thread.fpu);
write_fcsr(LOONGARCH_FCSR0, 0);
......@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
unsigned int *euen_curr;
WARN_ON(!this_cpu_read(in_kernel_fpu));
if (!is_fpu_owner())
disable_fpu();
euen_curr = this_cpu_ptr(&euen_current);
#ifdef CONFIG_CPU_HAS_LASX
if (*euen_curr & CSR_EUEN_LASXEN)
_restore_lasx(&current->thread.fpu);
else
#endif
#ifdef CONFIG_CPU_HAS_LSX
if (*euen_curr & CSR_EUEN_LSXEN)
_restore_lsx(&current->thread.fpu);
else
#endif
if (*euen_curr & CSR_EUEN_FPEN)
_restore_fp(&current->thread.fpu);
*euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
static int __init init_euen_mask(void)
{
if (cpu_has_lsx)
euen_mask |= CSR_EUEN_LSXEN;
if (cpu_has_lasx)
euen_mask |= CSR_EUEN_LASXEN;
return 0;
}
arch_initcall(init_euen_mask);
// SPDX-License-Identifier: GPL-2.0-only
/*
* LoongArch KGDB support
*
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#include <linux/hw_breakpoint.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
#include <linux/processor.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <asm/fpu.h>
#include <asm/hw_breakpoint.h>
#include <asm/inst.h>
#include <asm/irq_regs.h>
#include <asm/ptrace.h>
#include <asm/sigcontext.h>
int kgdb_watch_activated;
static unsigned int stepped_opcode;
static unsigned long stepped_address;
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
{ "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
{ "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
{ "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
{ "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
{ "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
{ "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
{ "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
{ "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
{ "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
{ "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
{ "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
{ "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
{ "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
{ "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
{ "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
{ "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
{ "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
{ "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
{ "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
{ "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
{ "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
{ "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
{ "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
{ "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
{ "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
{ "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
{ "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
{ "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
{ "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
{ "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
{ "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
{ "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
{ "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
{ "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
{ "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
{ "f0", GDB_SIZEOF_REG, 0 },
{ "f1", GDB_SIZEOF_REG, 1 },
{ "f2", GDB_SIZEOF_REG, 2 },
{ "f3", GDB_SIZEOF_REG, 3 },
{ "f4", GDB_SIZEOF_REG, 4 },
{ "f5", GDB_SIZEOF_REG, 5 },
{ "f6", GDB_SIZEOF_REG, 6 },
{ "f7", GDB_SIZEOF_REG, 7 },
{ "f8", GDB_SIZEOF_REG, 8 },
{ "f9", GDB_SIZEOF_REG, 9 },
{ "f10", GDB_SIZEOF_REG, 10 },
{ "f11", GDB_SIZEOF_REG, 11 },
{ "f12", GDB_SIZEOF_REG, 12 },
{ "f13", GDB_SIZEOF_REG, 13 },
{ "f14", GDB_SIZEOF_REG, 14 },
{ "f15", GDB_SIZEOF_REG, 15 },
{ "f16", GDB_SIZEOF_REG, 16 },
{ "f17", GDB_SIZEOF_REG, 17 },
{ "f18", GDB_SIZEOF_REG, 18 },
{ "f19", GDB_SIZEOF_REG, 19 },
{ "f20", GDB_SIZEOF_REG, 20 },
{ "f21", GDB_SIZEOF_REG, 21 },
{ "f22", GDB_SIZEOF_REG, 22 },
{ "f23", GDB_SIZEOF_REG, 23 },
{ "f24", GDB_SIZEOF_REG, 24 },
{ "f25", GDB_SIZEOF_REG, 25 },
{ "f26", GDB_SIZEOF_REG, 26 },
{ "f27", GDB_SIZEOF_REG, 27 },
{ "f28", GDB_SIZEOF_REG, 28 },
{ "f29", GDB_SIZEOF_REG, 29 },
{ "f30", GDB_SIZEOF_REG, 30 },
{ "f31", GDB_SIZEOF_REG, 31 },
{ "fcc0", 1, 0 },
{ "fcc1", 1, 1 },
{ "fcc2", 1, 2 },
{ "fcc3", 1, 3 },
{ "fcc4", 1, 4 },
{ "fcc5", 1, 5 },
{ "fcc6", 1, 6 },
{ "fcc7", 1, 7 },
{ "fcsr", 4, 0 },
};
char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
{
int reg_offset, reg_size;
if (regno < 0 || regno >= DBG_MAX_REG_NUM)
return NULL;
reg_offset = dbg_reg_def[regno].offset;
reg_size = dbg_reg_def[regno].size;
if (reg_offset == -1)
goto out;
/* Handle general-purpose/orig_a0/pc/badv registers */
if (regno <= DBG_PT_REGS_END) {
memcpy(mem, (void *)regs + reg_offset, reg_size);
goto out;
}
if (!(regs->csr_euen & CSR_EUEN_FPEN))
goto out;
save_fp(current);
/* Handle FP registers */
switch (regno) {
case DBG_FCSR: /* Process the fcsr */
memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
break;
case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
break;
case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
break;
default:
break;
}
out:
return dbg_reg_def[regno].name;
}
int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
{
int reg_offset, reg_size;
if (regno < 0 || regno >= DBG_MAX_REG_NUM)
return -EINVAL;
reg_offset = dbg_reg_def[regno].offset;
reg_size = dbg_reg_def[regno].size;
if (reg_offset == -1)
return 0;
/* Handle general-purpose/orig_a0/pc/badv registers */
if (regno <= DBG_PT_REGS_END) {
memcpy((void *)regs + reg_offset, mem, reg_size);
return 0;
}
if (!(regs->csr_euen & CSR_EUEN_FPEN))
return 0;
/* Handle FP registers */
switch (regno) {
case DBG_FCSR: /* Process the fcsr */
memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
break;
case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
break;
case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
break;
default:
break;
}
restore_fp(current);
return 0;
}
/*
* Similar to regs_to_gdb_regs() except that process is sleeping and so
* we may not be able to get all the info.
*/
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
/* Initialize to zero */
memset((char *)gdb_regs, 0, NUMREGBYTES);
gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
gdb_regs[DBG_LOONGARCH_TP] = (long)p;
gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
/* S0 - S8 */
gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
/*
* PC use return address (RA), i.e. the moment after return from __switch_to()
*/
gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
}
void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
{
regs->csr_era = pc;
}
void arch_kgdb_breakpoint(void)
{
__asm__ __volatile__ ( \
".globl kgdb_breakinst\n\t" \
"nop\n" \
"kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
}
/*
* Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
* then try to fall into the debugger
*/
static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
{
struct die_args *args = (struct die_args *)ptr;
struct pt_regs *regs = args->regs;
/* Userspace events, ignore. */
if (user_mode(regs))
return NOTIFY_DONE;
if (!kgdb_io_module_registered)
return NOTIFY_DONE;
if (atomic_read(&kgdb_active) != -1)
kgdb_nmicallback(smp_processor_id(), regs);
if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
return NOTIFY_DONE;
if (atomic_read(&kgdb_setting_breakpoint))
if (regs->csr_era == (unsigned long)&kgdb_breakinst)
regs->csr_era += LOONGARCH_INSN_SIZE;
return NOTIFY_STOP;
}
bool kgdb_breakpoint_handler(struct pt_regs *regs)
{
struct die_args args = {
.regs = regs,
.str = "Break",
.err = BRK_KDB,
.trapnr = read_csr_excode(),
.signr = SIGTRAP,
};
return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
}
static struct notifier_block kgdb_notifier = {
.notifier_call = kgdb_loongarch_notify,
};
static inline void kgdb_arch_update_addr(struct pt_regs *regs,
char *remcom_in_buffer)
{
unsigned long addr;
char *ptr;
ptr = &remcom_in_buffer[1];
if (kgdb_hex2long(&ptr, &addr))
regs->csr_era = addr;
}
/* Calculate the new address for after a step */
static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
{
char cj_val;
unsigned int si, si_l, si_h, rd, rj, cj;
unsigned long pc = instruction_pointer(regs);
union loongarch_instruction *ip = (union loongarch_instruction *)pc;
if (pc & 3) {
pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
return -EINVAL;
}
*next_addr = pc + LOONGARCH_INSN_SIZE;
si_h = ip->reg0i26_format.immediate_h;
si_l = ip->reg0i26_format.immediate_l;
switch (ip->reg0i26_format.opcode) {
case b_op:
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
return 0;
case bl_op:
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
return 0;
}
rj = ip->reg1i21_format.rj;
cj = (rj & 0x07) + DBG_FCC_BASE;
si_l = ip->reg1i21_format.immediate_l;
si_h = ip->reg1i21_format.immediate_h;
dbg_get_reg(cj, &cj_val, regs);
switch (ip->reg1i21_format.opcode) {
case beqz_op:
if (regs->regs[rj] == 0)
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
return 0;
case bnez_op:
if (regs->regs[rj] != 0)
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
return 0;
case bceqz_op: /* bceqz_op = bcnez_op */
if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
*next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
return 0;
}
rj = ip->reg2i16_format.rj;
rd = ip->reg2i16_format.rd;
si = ip->reg2i16_format.immediate;
switch (ip->reg2i16_format.opcode) {
case beq_op:
if (regs->regs[rj] == regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case bne_op:
if (regs->regs[rj] != regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case blt_op:
if ((long)regs->regs[rj] < (long)regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case bge_op:
if ((long)regs->regs[rj] >= (long)regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case bltu_op:
if (regs->regs[rj] < regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case bgeu_op:
if (regs->regs[rj] >= regs->regs[rd])
*next_addr = pc + sign_extend64(si << 2, 17);
return 0;
case jirl_op:
regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
*next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
return 0;
}
return 0;
}
static int do_single_step(struct pt_regs *regs)
{
int error = 0;
unsigned long addr = 0; /* Determine where the target instruction will send us to */
error = get_step_address(regs, &addr);
if (error)
return error;
/* Store the opcode in the stepped address */
error = get_kernel_nofault(stepped_opcode, (void *)addr);
if (error)
return error;
stepped_address = addr;
/* Replace the opcode with the break instruction */
error = copy_to_kernel_nofault((void *)stepped_address,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
if (error) {
stepped_opcode = 0;
stepped_address = 0;
} else {
kgdb_single_step = 1;
atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
}
return error;
}
/* Undo a single step */
static void undo_single_step(struct pt_regs *regs)
{
if (stepped_opcode) {
copy_to_kernel_nofault((void *)stepped_address,
(void *)&stepped_opcode, BREAK_INSTR_SIZE);
flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
}
stepped_opcode = 0;
stepped_address = 0;
kgdb_single_step = 0;
atomic_set(&kgdb_cpu_doing_single_step, -1);
}
int kgdb_arch_handle_exception(int vector, int signo, int err_code,
char *remcom_in_buffer, char *remcom_out_buffer,
struct pt_regs *regs)
{
int ret = 0;
undo_single_step(regs);
regs->csr_prmd |= CSR_PRMD_PWE;
switch (remcom_in_buffer[0]) {
case 'D':
case 'k':
regs->csr_prmd &= ~CSR_PRMD_PWE;
fallthrough;
case 'c':
kgdb_arch_update_addr(regs, remcom_in_buffer);
break;
case 's':
kgdb_arch_update_addr(regs, remcom_in_buffer);
ret = do_single_step(regs);
break;
default:
ret = -1;
}
return ret;
}
static struct hw_breakpoint {
unsigned int enabled;
unsigned long addr;
int len;
int type;
struct perf_event * __percpu *pev;
} breakinfo[LOONGARCH_MAX_BRP];
static int hw_break_reserve_slot(int breakno)
{
int cpu, cnt = 0;
struct perf_event **pevent;
for_each_online_cpu(cpu) {
cnt++;
pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
if (dbg_reserve_bp_slot(*pevent))
goto fail;
}
return 0;
fail:
for_each_online_cpu(cpu) {
cnt--;
if (!cnt)
break;
pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
dbg_release_bp_slot(*pevent);
}
return -1;
}
static int hw_break_release_slot(int breakno)
{
int cpu;
struct perf_event **pevent;
if (dbg_is_early)
return 0;
for_each_online_cpu(cpu) {
pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
if (dbg_release_bp_slot(*pevent))
/*
* The debugger is responsible for handing the retry on
* remove failure.
*/
return -1;
}
return 0;
}
static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
{
int i;
for (i = 0; i < LOONGARCH_MAX_BRP; i++)
if (!breakinfo[i].enabled)
break;
if (i == LOONGARCH_MAX_BRP)
return -1;
switch (bptype) {
case BP_HARDWARE_BREAKPOINT:
breakinfo[i].type = HW_BREAKPOINT_X;
break;
case BP_READ_WATCHPOINT:
breakinfo[i].type = HW_BREAKPOINT_R;
break;
case BP_WRITE_WATCHPOINT:
breakinfo[i].type = HW_BREAKPOINT_W;
break;
case BP_ACCESS_WATCHPOINT:
breakinfo[i].type = HW_BREAKPOINT_RW;
break;
default:
return -1;
}
switch (len) {
case 1:
breakinfo[i].len = HW_BREAKPOINT_LEN_1;
break;
case 2:
breakinfo[i].len = HW_BREAKPOINT_LEN_2;
break;
case 4:
breakinfo[i].len = HW_BREAKPOINT_LEN_4;
break;
case 8:
breakinfo[i].len = HW_BREAKPOINT_LEN_8;
break;
default:
return -1;
}
breakinfo[i].addr = addr;
if (hw_break_reserve_slot(i)) {
breakinfo[i].addr = 0;
return -1;
}
breakinfo[i].enabled = 1;
return 0;
}
static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
{
int i;
for (i = 0; i < LOONGARCH_MAX_BRP; i++)
if (breakinfo[i].addr == addr && breakinfo[i].enabled)
break;
if (i == LOONGARCH_MAX_BRP)
return -1;
if (hw_break_release_slot(i)) {
pr_err("Cannot remove hw breakpoint at %lx\n", addr);
return -1;
}
breakinfo[i].enabled = 0;
return 0;
}
static void kgdb_disable_hw_break(struct pt_regs *regs)
{
int i;
int cpu = raw_smp_processor_id();
struct perf_event *bp;
for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
if (!breakinfo[i].enabled)
continue;
bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
if (bp->attr.disabled == 1)
continue;
arch_uninstall_hw_breakpoint(bp);
bp->attr.disabled = 1;
}
/* Disable hardware debugging while we are in kgdb */
csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
}
static void kgdb_remove_all_hw_break(void)
{
int i;
int cpu = raw_smp_processor_id();
struct perf_event *bp;
for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
if (!breakinfo[i].enabled)
continue;
bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
if (!bp->attr.disabled) {
arch_uninstall_hw_breakpoint(bp);
bp->attr.disabled = 1;
continue;
}
if (hw_break_release_slot(i))
pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
breakinfo[i].enabled = 0;
}
csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
kgdb_watch_activated = 0;
}
static void kgdb_correct_hw_break(void)
{
int i, activated = 0;
for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
struct perf_event *bp;
int val;
int cpu = raw_smp_processor_id();
if (!breakinfo[i].enabled)
continue;
bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
if (bp->attr.disabled != 1)
continue;
bp->attr.bp_addr = breakinfo[i].addr;
bp->attr.bp_len = breakinfo[i].len;
bp->attr.bp_type = breakinfo[i].type;
val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
if (val)
return;
val = arch_install_hw_breakpoint(bp);
if (!val)
bp->attr.disabled = 0;
activated = 1;
}
csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
kgdb_watch_activated = activated;
}
const struct kgdb_arch arch_kgdb_ops = {
.gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
.flags = KGDB_HW_BREAKPOINT,
.set_hw_breakpoint = kgdb_set_hw_break,
.remove_hw_breakpoint = kgdb_remove_hw_break,
.disable_hw_break = kgdb_disable_hw_break,
.remove_all_hw_break = kgdb_remove_all_hw_break,
.correct_hw_break = kgdb_correct_hw_break,
};
int kgdb_arch_init(void)
{
return register_die_notifier(&kgdb_notifier);
}
void kgdb_arch_late(void)
{
int i, cpu;
struct perf_event_attr attr;
struct perf_event **pevent;
hw_breakpoint_init(&attr);
attr.bp_addr = (unsigned long)kgdb_arch_init;
attr.bp_len = HW_BREAKPOINT_LEN_4;
attr.bp_type = HW_BREAKPOINT_W;
attr.disabled = 1;
for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
if (breakinfo[i].pev)
continue;
breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
if (IS_ERR((void * __force)breakinfo[i].pev)) {
pr_err("kgdb: Could not allocate hw breakpoints.\n");
breakinfo[i].pev = NULL;
return;
}
for_each_online_cpu(cpu) {
pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
if (pevent[0]->destroy) {
pevent[0]->destroy = NULL;
release_bp_slot(*pevent);
}
}
}
}
void kgdb_arch_exit(void)
{
int i;
for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
if (breakinfo[i].pev) {
unregister_wide_hw_breakpoint(breakinfo[i].pev);
breakinfo[i].pev = NULL;
}
}
unregister_die_notifier(&kgdb_notifier);
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Author: Qi Hu <huqi@loongson.cn>
* Huacai Chen <chenhuacai@loongson.cn>
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/errno.h>
#include <asm/regdef.h>
#define SCR_REG_WIDTH 8
.macro EX insn, reg, src, offs
.ex\@: \insn \reg, \src, \offs
_asm_extable .ex\@, .L_lbt_fault
.endm
/*
* Save a thread's lbt context.
*/
SYM_FUNC_START(_save_lbt)
movscr2gr t1, $scr0 # save scr
stptr.d t1, a0, THREAD_SCR0
movscr2gr t1, $scr1
stptr.d t1, a0, THREAD_SCR1
movscr2gr t1, $scr2
stptr.d t1, a0, THREAD_SCR2
movscr2gr t1, $scr3
stptr.d t1, a0, THREAD_SCR3
x86mfflag t1, 0x3f # save eflags
stptr.d t1, a0, THREAD_EFLAGS
jr ra
SYM_FUNC_END(_save_lbt)
EXPORT_SYMBOL(_save_lbt)
/*
* Restore a thread's lbt context.
*/
SYM_FUNC_START(_restore_lbt)
ldptr.d t1, a0, THREAD_SCR0 # restore scr
movgr2scr $scr0, t1
ldptr.d t1, a0, THREAD_SCR1
movgr2scr $scr1, t1
ldptr.d t1, a0, THREAD_SCR2
movgr2scr $scr2, t1
ldptr.d t1, a0, THREAD_SCR3
movgr2scr $scr3, t1
ldptr.d t1, a0, THREAD_EFLAGS # restore eflags
x86mtflag t1, 0x3f
jr ra
SYM_FUNC_END(_restore_lbt)
EXPORT_SYMBOL(_restore_lbt)
/*
* Load scr/eflag with zero.
*/
SYM_FUNC_START(_init_lbt)
movgr2scr $scr0, zero
movgr2scr $scr1, zero
movgr2scr $scr2, zero
movgr2scr $scr3, zero
x86mtflag zero, 0x3f
jr ra
SYM_FUNC_END(_init_lbt)
/*
* a0: scr
* a1: eflag
*/
SYM_FUNC_START(_save_lbt_context)
movscr2gr t1, $scr0 # save scr
EX st.d t1, a0, (0 * SCR_REG_WIDTH)
movscr2gr t1, $scr1
EX st.d t1, a0, (1 * SCR_REG_WIDTH)
movscr2gr t1, $scr2
EX st.d t1, a0, (2 * SCR_REG_WIDTH)
movscr2gr t1, $scr3
EX st.d t1, a0, (3 * SCR_REG_WIDTH)
x86mfflag t1, 0x3f # save eflags
EX st.w t1, a1, 0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_lbt_context)
/*
* a0: scr
* a1: eflag
*/
SYM_FUNC_START(_restore_lbt_context)
EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr
movgr2scr $scr0, t1
EX ld.d t1, a0, (1 * SCR_REG_WIDTH)
movgr2scr $scr1, t1
EX ld.d t1, a0, (2 * SCR_REG_WIDTH)
movgr2scr $scr2, t1
EX ld.d t1, a0, (3 * SCR_REG_WIDTH)
movgr2scr $scr3, t1
EX ld.w t1, a1, 0 # restore eflags
x86mtflag t1, 0x3f
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_lbt_context)
/*
* a0: ftop
*/
SYM_FUNC_START(_save_ftop_context)
x86mftop t1
st.w t1, a0, 0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_ftop_context)
/*
* a0: ftop
*/
SYM_FUNC_START(_restore_ftop_context)
ld.w t1, a0, 0
andi t1, t1, 0x7
la.pcrel a0, 1f
alsl.d a0, t1, a0, 3
jr a0
1:
x86mttop 0
b 2f
x86mttop 1
b 2f
x86mttop 2
b 2f
x86mttop 3
b 2f
x86mttop 4
b 2f
x86mttop 5
b 2f
x86mttop 6
b 2f
x86mttop 7
2:
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_ftop_context)
.L_lbt_fault:
li.w a0, -EFAULT # failure
jr ra
......@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
void __init pcpu_populate_pte(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud;
pmd_t *pmd;
if (p4d_none(*p4d)) {
pud_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pgd_populate(&init_mm, pgd, new);
#ifndef __PAGETABLE_PUD_FOLDED
pud_init(new);
#endif
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pud_populate(&init_mm, pud, new);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_init(new);
#endif
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) {
pte_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pmd_populate_kernel(&init_mm, pmd, new);
}
populate_kernel_pte(addr);
}
void __init setup_per_cpu_areas(void)
......@@ -470,7 +438,6 @@ void __init mem_init(void)
{
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
memblock_free_all();
setup_zero_pages(); /* This comes from node 0 */
}
int pcibus_to_node(struct pci_bus *bus)
......
......@@ -38,6 +38,7 @@
#include <asm/cpu.h>
#include <asm/elf.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
......@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
regs->csr_euen = euen;
lose_fpu(0);
lose_lbt(0);
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
clear_thread_flag(TIF_LBT_CTX_LIVE);
clear_used_math();
regs->csr_era = pc;
regs->regs[3] = sp;
......@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
preempt_enable();
if (used_math())
memcpy(dst, src, sizeof(struct task_struct));
else
if (!used_math())
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
else
memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
#ifdef CONFIG_CPU_HAS_LBT
memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
#endif
return 0;
}
......@@ -189,8 +196,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
ptrace_hw_copy_thread(p);
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDSIMD);
clear_tsk_thread_flag(p, TIF_USEDLBT);
clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
return 0;
}
......
......@@ -38,6 +38,7 @@
#include <asm/cpu.h>
#include <asm/cpu-info.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/loongarch.h>
#include <asm/page.h>
#include <asm/pgtable.h>
......@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LBT
static int lbt_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
int r;
r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
return r;
}
static int lbt_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
int err = 0;
const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
const int ftop_start = eflags_start + sizeof(u32);
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.lbt.scr0,
0, 4 * sizeof(target->thread.lbt.scr0));
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.lbt.eflags,
eflags_start, ftop_start);
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.ftop,
ftop_start, ftop_start + sizeof(u32));
return err;
}
#endif /* CONFIG_CPU_HAS_LBT */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
......@@ -802,6 +843,9 @@ enum loongarch_regset {
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
#ifdef CONFIG_CPU_HAS_LBT
REGSET_LBT,
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
......@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
.set = simd_set,
},
#endif
#ifdef CONFIG_CPU_HAS_LBT
[REGSET_LBT] = {
.core_note_type = NT_LOONGARCH_LBT,
.n = 5,
.size = sizeof(u64),
.align = sizeof(u64),
.regset_get = lbt_get,
.set = lbt_set,
},
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,
......
......@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
*new_addr = (unsigned long)reloc_offset;
}
void * __init relocate_kernel(void)
unsigned long __init relocate_kernel(void)
{
unsigned long kernel_length;
unsigned long random_offset = 0;
void *location_new = _text; /* Default to original kernel start */
void *kernel_entry = start_kernel; /* Default to original kernel entry point */
char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
......@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
reloc_offset += random_offset;
/* Return the new kernel's entry point */
kernel_entry = RELOCATED_KASLR(start_kernel);
/* The current thread is now within the relocated kernel */
__current_thread_info = RELOCATED_KASLR(__current_thread_info);
......@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
relocate_absolute(random_offset);
return kernel_entry;
return random_offset;
}
/*
......
......@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
#endif
paging_init();
#ifdef CONFIG_KASAN
kasan_init();
#endif
}
......@@ -32,6 +32,7 @@
#include <asm/cacheflush.h>
#include <asm/cpu-features.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
......@@ -44,6 +45,9 @@
/* Make sure we will not lose FPU ownership */
#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
/* Make sure we will not lose LBT ownership */
#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); })
/* Assembly functions to move context to/from the FPU */
extern asmlinkage int
......@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
#ifdef CONFIG_CPU_HAS_LBT
extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
extern asmlinkage int _save_ftop_context(void __user *ftop);
extern asmlinkage int _restore_ftop_context(void __user *ftop);
#endif
struct rt_sigframe {
struct siginfo rs_info;
struct ucontext rs_uctx;
......@@ -75,6 +86,7 @@ struct extctx_layout {
struct _ctx_layout fpu;
struct _ctx_layout lsx;
struct _ctx_layout lasx;
struct _ctx_layout lbt;
struct _ctx_layout end;
};
......@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
return err;
}
#ifdef CONFIG_CPU_HAS_LBT
static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
{
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
err |= __put_user(current->thread.lbt.scr0, &regs[0]);
err |= __put_user(current->thread.lbt.scr1, &regs[1]);
err |= __put_user(current->thread.lbt.scr2, &regs[2]);
err |= __put_user(current->thread.lbt.scr3, &regs[3]);
err |= __put_user(current->thread.lbt.eflags, eflags);
return err;
}
static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
{
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
err |= __get_user(current->thread.lbt.scr0, &regs[0]);
err |= __get_user(current->thread.lbt.scr1, &regs[1]);
err |= __get_user(current->thread.lbt.scr2, &regs[2]);
err |= __get_user(current->thread.lbt.scr3, &regs[3]);
err |= __get_user(current->thread.lbt.eflags, eflags);
return err;
}
static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
{
uint32_t __user *ftop = &ctx->ftop;
return __put_user(current->thread.fpu.ftop, ftop);
}
static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
{
uint32_t __user *ftop = &ctx->ftop;
return __get_user(current->thread.fpu.ftop, ftop);
}
#endif
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
......@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
return _restore_lasx_context(regs, fcc, fcsr);
}
/*
* Wrappers for the assembly _{save,restore}_lbt_context functions.
*/
#ifdef CONFIG_CPU_HAS_LBT
static int save_hw_lbt_context(struct lbt_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
return _save_lbt_context(regs, eflags);
}
static int restore_hw_lbt_context(struct lbt_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
return _restore_lbt_context(regs, eflags);
}
static int save_hw_ftop_context(struct lbt_context __user *ctx)
{
uint32_t __user *ftop = &ctx->ftop;
return _save_ftop_context(ftop);
}
static int restore_hw_ftop_context(struct lbt_context __user *ctx)
{
uint32_t __user *ftop = &ctx->ftop;
return _restore_ftop_context(ftop);
}
#endif
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
......@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
return err ?: sig;
}
#ifdef CONFIG_CPU_HAS_LBT
static int protected_save_lbt_context(struct extctx_layout *extctx)
{
int err = 0;
struct sctx_info __user *info = extctx->lbt.addr;
struct lbt_context __user *lbt_ctx =
(struct lbt_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
while (1) {
lock_lbt_owner();
if (is_lbt_owner())
err |= save_hw_lbt_context(lbt_ctx);
else
err |= copy_lbt_to_sigcontext(lbt_ctx);
if (is_fpu_owner())
err |= save_hw_ftop_context(lbt_ctx);
else
err |= copy_ftop_to_sigcontext(lbt_ctx);
unlock_lbt_owner();
err |= __put_user(LBT_CTX_MAGIC, &info->magic);
err |= __put_user(extctx->lbt.size, &info->size);
if (likely(!err))
break;
/* Touch the LBT context and try again */
err = __put_user(0, &regs[0]) | __put_user(0, eflags);
if (err)
return err;
}
return err;
}
static int protected_restore_lbt_context(struct extctx_layout *extctx)
{
int err = 0, tmp __maybe_unused;
struct sctx_info __user *info = extctx->lbt.addr;
struct lbt_context __user *lbt_ctx =
(struct lbt_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
while (1) {
lock_lbt_owner();
if (is_lbt_owner())
err |= restore_hw_lbt_context(lbt_ctx);
else
err |= copy_lbt_from_sigcontext(lbt_ctx);
if (is_fpu_owner())
err |= restore_hw_ftop_context(lbt_ctx);
else
err |= copy_ftop_from_sigcontext(lbt_ctx);
unlock_lbt_owner();
if (likely(!err))
break;
/* Touch the LBT context and try again */
err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
if (err)
return err;
}
return err;
}
#endif
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
......@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
#ifdef CONFIG_CPU_HAS_LBT
if (extctx->lbt.addr)
err |= protected_save_lbt_context(extctx);
#endif
/* Set the "end" magic */
info = (struct sctx_info *)extctx->end.addr;
err |= __put_user(0, &info->magic);
......@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
extctx->lasx.addr = info;
break;
case LBT_CTX_MAGIC:
if (size < (sizeof(struct sctx_info) +
sizeof(struct lbt_context)))
goto invalid;
extctx->lbt.addr = info;
break;
default:
goto invalid;
}
......@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
#ifdef CONFIG_CPU_HAS_LBT
if (extctx.lbt.addr)
err |= protected_restore_lbt_context(&extctx);
#endif
bad:
return err;
}
......@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}
#ifdef CONFIG_CPU_HAS_LBT
if (cpu_has_lbt && thread_lbt_context_live()) {
new_sp = extframe_alloc(extctx, &extctx->lbt,
sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
}
#endif
return new_sp;
}
......
......@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct pt_regs dummyregs;
struct unwind_state state;
regs = &dummyregs;
if (!regs) {
regs = &dummyregs;
if (task == current) {
regs->regs[3] = (unsigned long)__builtin_frame_address(0);
regs->csr_era = (unsigned long)__builtin_return_address(0);
} else {
regs->regs[3] = thread_saved_fp(task);
regs->csr_era = thread_saved_ra(task);
if (task == current) {
regs->regs[3] = (unsigned long)__builtin_frame_address(0);
regs->csr_era = (unsigned long)__builtin_return_address(0);
} else {
regs->regs[3] = thread_saved_fp(task);
regs->csr_era = thread_saved_ra(task);
}
regs->regs[1] = 0;
}
regs->regs[1] = 0;
for (unwind_start(&state, task, regs);
!unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
......
......@@ -36,7 +36,9 @@
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
#include <asm/inst.h>
#include <asm/kgdb.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
......@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
* pertain to them.
*/
switch (bcode) {
case BRK_KDB:
if (kgdb_breakpoint_handler(regs))
goto out;
else
break;
case BRK_KPROBE_BP:
if (kprobe_breakpoint_handler(regs))
goto out;
......@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
#ifndef CONFIG_HAVE_HW_BREAKPOINT
pr_warn("Hardware watch point handler not implemented!\n");
#else
if (kgdb_breakpoint_handler(regs))
goto out;
if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
unsigned long pc = instruction_pointer(regs);
......@@ -966,13 +976,47 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs)
irqentry_exit(regs, state);
}
static void init_restore_lbt(void)
{
if (!thread_lbt_context_live()) {
/* First time LBT context user */
init_lbt();
set_thread_flag(TIF_LBT_CTX_LIVE);
} else {
if (!is_lbt_owner())
own_lbt_inatomic(1);
}
BUG_ON(!is_lbt_enabled());
}
asmlinkage void noinstr do_lbt(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
force_sig(SIGILL);
local_irq_disable();
/*
* BTD (Binary Translation Disable exception) can be triggered
* during FP save/restore if TM (Top Mode) is on, which may
* cause irq_enable during 'switch_to'. To avoid this situation
* (including the user using 'MOVGR2GCSR' to turn on TM, which
* will not trigger the BTE), we need to check PRMD first.
*/
if (regs->csr_prmd & CSR_PRMD_PIE)
local_irq_enable();
if (!cpu_has_lbt) {
force_sig(SIGILL);
goto out;
}
BUG_ON(is_lbt_enabled());
preempt_disable();
init_restore_lbt();
preempt_enable();
out:
if (regs->csr_prmd & CSR_PRMD_PIE)
local_irq_disable();
irqentry_exit(regs, state);
}
......
......@@ -6,4 +6,6 @@
lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
......@@ -11,19 +11,6 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a1, -\to
jr ra
.endr
SYM_FUNC_START(__clear_user)
/*
* Some CPUs support hardware unaligned access
......@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1
jr ra
_asm_extable 1b, .L_fixup_handle_s0
_asm_extable 1b, 2b
SYM_FUNC_END(__clear_user_generic)
/*
......@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
jr ra
/* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2
_asm_extable 4b, .L_fixup_handle_3
_asm_extable 5b, .L_fixup_handle_4
_asm_extable 6b, .L_fixup_handle_5
_asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7
_asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_1
_asm_extable 11b, .L_fixup_handle_2
_asm_extable 12b, .L_fixup_handle_3
_asm_extable 13b, .L_fixup_handle_0
_asm_extable 14b, .L_fixup_handle_1
_asm_extable 15b, .L_fixup_handle_0
_asm_extable 16b, .L_fixup_handle_0
_asm_extable 17b, .L_fixup_handle_s0
_asm_extable 18b, .L_fixup_handle_s0
_asm_extable 19b, .L_fixup_handle_s0
_asm_extable 20b, .L_fixup_handle_s2
_asm_extable 21b, .L_fixup_handle_s0
_asm_extable 22b, .L_fixup_handle_s0
_asm_extable 23b, .L_fixup_handle_s4
_asm_extable 24b, .L_fixup_handle_s0
_asm_extable 25b, .L_fixup_handle_s4
_asm_extable 26b, .L_fixup_handle_s0
_asm_extable 27b, .L_fixup_handle_s4
_asm_extable 28b, .L_fixup_handle_s0
.Llarge_fixup:
sub.d a1, a2, a0
.Lsmall_fixup:
29: st.b zero, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, -1
bgt a1, zero, 29b
.Lexit:
move a0, a1
jr ra
_asm_extable 0b, .Lsmall_fixup
_asm_extable 1b, .Llarge_fixup
_asm_extable 2b, .Llarge_fixup
_asm_extable 3b, .Llarge_fixup
_asm_extable 4b, .Llarge_fixup
_asm_extable 5b, .Llarge_fixup
_asm_extable 6b, .Llarge_fixup
_asm_extable 7b, .Llarge_fixup
_asm_extable 8b, .Llarge_fixup
_asm_extable 9b, .Llarge_fixup
_asm_extable 10b, .Llarge_fixup
_asm_extable 11b, .Llarge_fixup
_asm_extable 12b, .Llarge_fixup
_asm_extable 13b, .Llarge_fixup
_asm_extable 14b, .Llarge_fixup
_asm_extable 15b, .Llarge_fixup
_asm_extable 16b, .Llarge_fixup
_asm_extable 17b, .Lexit
_asm_extable 18b, .Lsmall_fixup
_asm_extable 19b, .Lsmall_fixup
_asm_extable 20b, .Lsmall_fixup
_asm_extable 21b, .Lsmall_fixup
_asm_extable 22b, .Lsmall_fixup
_asm_extable 23b, .Lsmall_fixup
_asm_extable 24b, .Lsmall_fixup
_asm_extable 25b, .Lsmall_fixup
_asm_extable 26b, .Lsmall_fixup
_asm_extable 27b, .Lsmall_fixup
_asm_extable 28b, .Lsmall_fixup
_asm_extable 29b, .Lexit
SYM_FUNC_END(__clear_user_fast)
......@@ -11,19 +11,6 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a2, -\to
jr ra
.endr
SYM_FUNC_START(__copy_user)
/*
* Some CPUs support hardware unaligned access
......@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
_asm_extable 1b, .L_fixup_handle_s0
_asm_extable 2b, .L_fixup_handle_s0
_asm_extable 1b, 3b
_asm_extable 2b, 3b
SYM_FUNC_END(__copy_user_generic)
/*
......@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
sltui t0, a2, 9
bnez t0, .Lsmall
add.d a3, a1, a2
add.d a2, a0, a2
0: ld.d t0, a1, 0
1: st.d t0, a0, 0
add.d a3, a1, a2
add.d a2, a0, a2
/* align up destination address */
andi t1, a0, 7
......@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
7: ld.d t5, a1, 40
8: ld.d t6, a1, 48
9: ld.d t7, a1, 56
addi.d a1, a1, 64
10: st.d t0, a0, 0
11: st.d t1, a0, 8
12: st.d t2, a0, 16
......@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
15: st.d t5, a0, 40
16: st.d t6, a0, 48
17: st.d t7, a0, 56
addi.d a1, a1, 64
addi.d a0, a0, 64
bltu a1, a4, .Lloop64
......@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
19: ld.d t1, a1, 8
20: ld.d t2, a1, 16
21: ld.d t3, a1, 24
addi.d a1, a1, 32
22: st.d t0, a0, 0
23: st.d t1, a0, 8
24: st.d t2, a0, 16
25: st.d t3, a0, 24
addi.d a1, a1, 32
addi.d a0, a0, 32
.Llt32:
......@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
bgeu a1, a4, .Llt16
26: ld.d t0, a1, 0
27: ld.d t1, a1, 8
addi.d a1, a1, 16
28: st.d t0, a0, 0
29: st.d t1, a0, 8
addi.d a1, a1, 16
addi.d a0, a0, 16
.Llt16:
......@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
addi.d a1, a1, 8
addi.d a0, a0, 8
.Llt8:
......@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
jr ra
/* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_0
_asm_extable 3b, .L_fixup_handle_0
_asm_extable 4b, .L_fixup_handle_0
_asm_extable 5b, .L_fixup_handle_0
_asm_extable 6b, .L_fixup_handle_0
_asm_extable 7b, .L_fixup_handle_0
_asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_0
_asm_extable 11b, .L_fixup_handle_1
_asm_extable 12b, .L_fixup_handle_2
_asm_extable 13b, .L_fixup_handle_3
_asm_extable 14b, .L_fixup_handle_4
_asm_extable 15b, .L_fixup_handle_5
_asm_extable 16b, .L_fixup_handle_6
_asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0
_asm_extable 19b, .L_fixup_handle_0
_asm_extable 20b, .L_fixup_handle_0
_asm_extable 21b, .L_fixup_handle_0
_asm_extable 22b, .L_fixup_handle_0
_asm_extable 23b, .L_fixup_handle_1
_asm_extable 24b, .L_fixup_handle_2
_asm_extable 25b, .L_fixup_handle_3
_asm_extable 26b, .L_fixup_handle_0
_asm_extable 27b, .L_fixup_handle_0
_asm_extable 28b, .L_fixup_handle_0
_asm_extable 29b, .L_fixup_handle_1
_asm_extable 30b, .L_fixup_handle_0
_asm_extable 31b, .L_fixup_handle_0
_asm_extable 32b, .L_fixup_handle_0
_asm_extable 33b, .L_fixup_handle_0
_asm_extable 34b, .L_fixup_handle_s0
_asm_extable 35b, .L_fixup_handle_s0
_asm_extable 36b, .L_fixup_handle_s0
_asm_extable 37b, .L_fixup_handle_s0
_asm_extable 38b, .L_fixup_handle_s0
_asm_extable 39b, .L_fixup_handle_s0
_asm_extable 40b, .L_fixup_handle_s0
_asm_extable 41b, .L_fixup_handle_s2
_asm_extable 42b, .L_fixup_handle_s0
_asm_extable 43b, .L_fixup_handle_s0
_asm_extable 44b, .L_fixup_handle_s0
_asm_extable 45b, .L_fixup_handle_s0
_asm_extable 46b, .L_fixup_handle_s0
_asm_extable 47b, .L_fixup_handle_s4
_asm_extable 48b, .L_fixup_handle_s0
_asm_extable 49b, .L_fixup_handle_s0
_asm_extable 50b, .L_fixup_handle_s0
_asm_extable 51b, .L_fixup_handle_s4
_asm_extable 52b, .L_fixup_handle_s0
_asm_extable 53b, .L_fixup_handle_s0
_asm_extable 54b, .L_fixup_handle_s0
_asm_extable 55b, .L_fixup_handle_s4
_asm_extable 56b, .L_fixup_handle_s0
_asm_extable 57b, .L_fixup_handle_s0
.Llarge_fixup:
sub.d a2, a2, a0
.Lsmall_fixup:
58: ld.b t0, a1, 0
59: st.b t0, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
bgt a2, zero, 58b
.Lexit:
move a0, a2
jr ra
_asm_extable 0b, .Lsmall_fixup
_asm_extable 1b, .Lsmall_fixup
_asm_extable 2b, .Llarge_fixup
_asm_extable 3b, .Llarge_fixup
_asm_extable 4b, .Llarge_fixup
_asm_extable 5b, .Llarge_fixup
_asm_extable 6b, .Llarge_fixup
_asm_extable 7b, .Llarge_fixup
_asm_extable 8b, .Llarge_fixup
_asm_extable 9b, .Llarge_fixup
_asm_extable 10b, .Llarge_fixup
_asm_extable 11b, .Llarge_fixup
_asm_extable 12b, .Llarge_fixup
_asm_extable 13b, .Llarge_fixup
_asm_extable 14b, .Llarge_fixup
_asm_extable 15b, .Llarge_fixup
_asm_extable 16b, .Llarge_fixup
_asm_extable 17b, .Llarge_fixup
_asm_extable 18b, .Llarge_fixup
_asm_extable 19b, .Llarge_fixup
_asm_extable 20b, .Llarge_fixup
_asm_extable 21b, .Llarge_fixup
_asm_extable 22b, .Llarge_fixup
_asm_extable 23b, .Llarge_fixup
_asm_extable 24b, .Llarge_fixup
_asm_extable 25b, .Llarge_fixup
_asm_extable 26b, .Llarge_fixup
_asm_extable 27b, .Llarge_fixup
_asm_extable 28b, .Llarge_fixup
_asm_extable 29b, .Llarge_fixup
_asm_extable 30b, .Llarge_fixup
_asm_extable 31b, .Llarge_fixup
_asm_extable 32b, .Llarge_fixup
_asm_extable 33b, .Llarge_fixup
_asm_extable 34b, .Lexit
_asm_extable 35b, .Lexit
_asm_extable 36b, .Lsmall_fixup
_asm_extable 37b, .Lsmall_fixup
_asm_extable 38b, .Lsmall_fixup
_asm_extable 39b, .Lsmall_fixup
_asm_extable 40b, .Lsmall_fixup
_asm_extable 41b, .Lsmall_fixup
_asm_extable 42b, .Lsmall_fixup
_asm_extable 43b, .Lsmall_fixup
_asm_extable 44b, .Lsmall_fixup
_asm_extable 45b, .Lsmall_fixup
_asm_extable 46b, .Lsmall_fixup
_asm_extable 47b, .Lsmall_fixup
_asm_extable 48b, .Lsmall_fixup
_asm_extable 49b, .Lsmall_fixup
_asm_extable 50b, .Lsmall_fixup
_asm_extable 51b, .Lsmall_fixup
_asm_extable 52b, .Lsmall_fixup
_asm_extable 53b, .Lsmall_fixup
_asm_extable 54b, .Lsmall_fixup
_asm_extable 55b, .Lsmall_fixup
_asm_extable 56b, .Lsmall_fixup
_asm_extable 57b, .Lsmall_fixup
_asm_extable 58b, .Lexit
_asm_extable 59b, .Lexit
SYM_FUNC_END(__copy_user_fast)
......@@ -10,6 +10,8 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
.section .noinstr.text, "ax"
SYM_FUNC_START(memcpy)
/*
* Some CPUs support hardware unaligned access
......@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
ALTERNATIVE "b __memcpy_generic", \
"b __memcpy_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memcpy)
_ASM_NOKPROBE(memcpy)
SYM_FUNC_ALIAS(__memcpy, memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
_ASM_NOKPROBE(memcpy)
_ASM_NOKPROBE(__memcpy)
/*
* void *__memcpy_generic(void *dst, const void *src, size_t n)
......
......@@ -10,23 +10,29 @@
#include <asm/cpu.h>
#include <asm/regdef.h>
.section .noinstr.text, "ax"
SYM_FUNC_START(memmove)
blt a0, a1, memcpy /* dst < src, memcpy */
blt a1, a0, rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */
blt a0, a1, __memcpy /* dst < src, memcpy */
blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */
SYM_FUNC_END(memmove)
_ASM_NOKPROBE(memmove)
SYM_FUNC_ALIAS(__memmove, memmove)
EXPORT_SYMBOL(memmove)
EXPORT_SYMBOL(__memmove)
_ASM_NOKPROBE(memmove)
_ASM_NOKPROBE(__memmove)
SYM_FUNC_START(rmemcpy)
SYM_FUNC_START(__rmemcpy)
/*
* Some CPUs support hardware unaligned access
*/
ALTERNATIVE "b __rmemcpy_generic", \
"b __rmemcpy_fast", CPU_FEATURE_UAL
SYM_FUNC_END(rmemcpy)
_ASM_NOKPROBE(rmemcpy)
SYM_FUNC_END(__rmemcpy)
_ASM_NOKPROBE(__rmemcpy)
/*
* void *__rmemcpy_generic(void *dst, const void *src, size_t n)
......
......@@ -16,6 +16,8 @@
bstrins.d \r0, \r0, 63, 32
.endm
.section .noinstr.text, "ax"
SYM_FUNC_START(memset)
/*
* Some CPUs support hardware unaligned access
......@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
ALTERNATIVE "b __memset_generic", \
"b __memset_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memset)
_ASM_NOKPROBE(memset)
SYM_FUNC_ALIAS(__memset, memset)
EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
_ASM_NOKPROBE(memset)
_ASM_NOKPROBE(__memset)
/*
* void *__memset_generic(void *s, int c, size_t n)
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* LoongArch SIMD XOR operations
*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*/
#include "xor_simd.h"
/*
* Process one cache line (64 bytes) per loop. This is assuming all future
* popular LoongArch cores are similar performance-characteristics-wise to the
* current models.
*/
#define LINE_WIDTH 64
#ifdef CONFIG_CPU_HAS_LSX
#define LD(reg, base, offset) \
"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
#define ST(reg, base, offset) \
"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
#define LD_INOUT_LINE(base) \
LD(0, base, 0) \
LD(1, base, 16) \
LD(2, base, 32) \
LD(3, base, 48)
#define LD_AND_XOR_LINE(base) \
LD(4, base, 0) \
LD(5, base, 16) \
LD(6, base, 32) \
LD(7, base, 48) \
XOR(0, 4) \
XOR(1, 5) \
XOR(2, 6) \
XOR(3, 7)
#define ST_LINE(base) \
ST(0, base, 0) \
ST(1, base, 16) \
ST(2, base, 32) \
ST(3, base, 48)
#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
#include "xor_template.c"
#undef LD
#undef ST
#undef XOR
#undef LD_INOUT_LINE
#undef LD_AND_XOR_LINE
#undef ST_LINE
#undef XOR_FUNC_NAME
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
#define LD(reg, base, offset) \
"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
#define ST(reg, base, offset) \
"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
#define LD_INOUT_LINE(base) \
LD(0, base, 0) \
LD(1, base, 32)
#define LD_AND_XOR_LINE(base) \
LD(2, base, 0) \
LD(3, base, 32) \
XOR(0, 2) \
XOR(1, 3)
#define ST_LINE(base) \
ST(0, base, 0) \
ST(1, base, 32)
#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
#include "xor_template.c"
#undef LD
#undef ST
#undef XOR
#undef LD_INOUT_LINE
#undef LD_AND_XOR_LINE
#undef ST_LINE
#undef XOR_FUNC_NAME
#endif /* CONFIG_CPU_HAS_LASX */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Simple interface to link xor_simd.c and xor_simd_glue.c
*
* Separating these files ensures that no SIMD instructions are run outside of
* the kfpu critical section.
*/
#ifndef __LOONGARCH_LIB_XOR_SIMD_H
#define __LOONGARCH_LIB_XOR_SIMD_H
#ifdef CONFIG_CPU_HAS_LSX
void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2);
void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3);
void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4);
void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4, const unsigned long * __restrict p5);
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2);
void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3);
void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4);
void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2, const unsigned long * __restrict p3,
const unsigned long * __restrict p4, const unsigned long * __restrict p5);
#endif /* CONFIG_CPU_HAS_LASX */
#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* LoongArch SIMD XOR operations
*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*/
#include <linux/export.h>
#include <linux/sched.h>
#include <asm/fpu.h>
#include <asm/xor_simd.h>
#include "xor_simd.h"
#define MAKE_XOR_GLUE_2(flavor) \
void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \
const unsigned long * __restrict p2) \
{ \
kernel_fpu_begin(); \
__xor_##flavor##_2(bytes, p1, p2); \
kernel_fpu_end(); \
} \
EXPORT_SYMBOL_GPL(xor_##flavor##_2)
#define MAKE_XOR_GLUE_3(flavor) \
void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \
const unsigned long * __restrict p2, \
const unsigned long * __restrict p3) \
{ \
kernel_fpu_begin(); \
__xor_##flavor##_3(bytes, p1, p2, p3); \
kernel_fpu_end(); \
} \
EXPORT_SYMBOL_GPL(xor_##flavor##_3)
#define MAKE_XOR_GLUE_4(flavor) \
void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \
const unsigned long * __restrict p2, \
const unsigned long * __restrict p3, \
const unsigned long * __restrict p4) \
{ \
kernel_fpu_begin(); \
__xor_##flavor##_4(bytes, p1, p2, p3, p4); \
kernel_fpu_end(); \
} \
EXPORT_SYMBOL_GPL(xor_##flavor##_4)
#define MAKE_XOR_GLUE_5(flavor) \
void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \
const unsigned long * __restrict p2, \
const unsigned long * __restrict p3, \
const unsigned long * __restrict p4, \
const unsigned long * __restrict p5) \
{ \
kernel_fpu_begin(); \
__xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \
kernel_fpu_end(); \
} \
EXPORT_SYMBOL_GPL(xor_##flavor##_5)
#define MAKE_XOR_GLUES(flavor) \
MAKE_XOR_GLUE_2(flavor); \
MAKE_XOR_GLUE_3(flavor); \
MAKE_XOR_GLUE_4(flavor); \
MAKE_XOR_GLUE_5(flavor)
#ifdef CONFIG_CPU_HAS_LSX
MAKE_XOR_GLUES(lsx);
#endif
#ifdef CONFIG_CPU_HAS_LASX
MAKE_XOR_GLUES(lasx);
#endif
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*
* Template for XOR operations, instantiated in xor_simd.c.
*
* Expected preprocessor definitions:
*
* - LINE_WIDTH
* - XOR_FUNC_NAME(nr)
* - LD_INOUT_LINE(buf)
* - LD_AND_XOR_LINE(buf)
* - ST_LINE(buf)
*/
void XOR_FUNC_NAME(2)(unsigned long bytes,
unsigned long * __restrict v1,
const unsigned long * __restrict v2)
{
unsigned long lines = bytes / LINE_WIDTH;
do {
__asm__ __volatile__ (
LD_INOUT_LINE(v1)
LD_AND_XOR_LINE(v2)
ST_LINE(v1)
: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
);
v1 += LINE_WIDTH / sizeof(unsigned long);
v2 += LINE_WIDTH / sizeof(unsigned long);
} while (--lines > 0);
}
void XOR_FUNC_NAME(3)(unsigned long bytes,
unsigned long * __restrict v1,
const unsigned long * __restrict v2,
const unsigned long * __restrict v3)
{
unsigned long lines = bytes / LINE_WIDTH;
do {
__asm__ __volatile__ (
LD_INOUT_LINE(v1)
LD_AND_XOR_LINE(v2)
LD_AND_XOR_LINE(v3)
ST_LINE(v1)
: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
);
v1 += LINE_WIDTH / sizeof(unsigned long);
v2 += LINE_WIDTH / sizeof(unsigned long);
v3 += LINE_WIDTH / sizeof(unsigned long);
} while (--lines > 0);
}
void XOR_FUNC_NAME(4)(unsigned long bytes,
unsigned long * __restrict v1,
const unsigned long * __restrict v2,
const unsigned long * __restrict v3,
const unsigned long * __restrict v4)
{
unsigned long lines = bytes / LINE_WIDTH;
do {
__asm__ __volatile__ (
LD_INOUT_LINE(v1)
LD_AND_XOR_LINE(v2)
LD_AND_XOR_LINE(v3)
LD_AND_XOR_LINE(v4)
ST_LINE(v1)
: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
: "memory"
);
v1 += LINE_WIDTH / sizeof(unsigned long);
v2 += LINE_WIDTH / sizeof(unsigned long);
v3 += LINE_WIDTH / sizeof(unsigned long);
v4 += LINE_WIDTH / sizeof(unsigned long);
} while (--lines > 0);
}
void XOR_FUNC_NAME(5)(unsigned long bytes,
unsigned long * __restrict v1,
const unsigned long * __restrict v2,
const unsigned long * __restrict v3,
const unsigned long * __restrict v4,
const unsigned long * __restrict v5)
{
unsigned long lines = bytes / LINE_WIDTH;
do {
__asm__ __volatile__ (
LD_INOUT_LINE(v1)
LD_AND_XOR_LINE(v2)
LD_AND_XOR_LINE(v3)
LD_AND_XOR_LINE(v4)
LD_AND_XOR_LINE(v5)
ST_LINE(v1)
: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
[v5] "r"(v5) : "memory"
);
v1 += LINE_WIDTH / sizeof(unsigned long);
v2 += LINE_WIDTH / sizeof(unsigned long);
v3 += LINE_WIDTH / sizeof(unsigned long);
v4 += LINE_WIDTH / sizeof(unsigned long);
v5 += LINE_WIDTH / sizeof(unsigned long);
} while (--lines > 0);
}
......@@ -7,3 +7,6 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \
fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
......@@ -156,7 +156,6 @@ void cpu_cache_init(void)
current_cpu_data.cache_leaves_present = leaf;
current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
shm_align_mask = PAGE_SIZE - 1;
}
static const pgprot_t protection_map[16] = {
......
......@@ -23,6 +23,7 @@
#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <linux/kfence.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
......@@ -30,7 +31,8 @@
int show_unhandled_signals = 1;
static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
static void __kprobes no_context(struct pt_regs *regs,
unsigned long write, unsigned long address)
{
const int field = sizeof(unsigned long) * 2;
......@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
if (fixup_exception(regs))
return;
if (kfence_handle_page_fault(address, write, regs))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
......@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
die("Oops", regs);
}
static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
static void __kprobes do_out_of_memory(struct pt_regs *regs,
unsigned long write, unsigned long address)
{
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
if (!user_mode(regs)) {
no_context(regs, address);
no_context(regs, write, address);
return;
}
pagefault_out_of_memory();
......@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
{
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
no_context(regs, address);
no_context(regs, write, address);
return;
}
......@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
no_context(regs, address);
no_context(regs, write, address);
return;
}
......@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
*/
if (address & __UA_LIMIT) {
if (!user_mode(regs))
no_context(regs, address);
no_context(regs, write, address);
else
do_sigsegv(regs, write, address, si_code);
return;
......@@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
no_context(regs, address);
no_context(regs, write, address);
return;
}
......@@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (unlikely(fault & VM_FAULT_ERROR)) {
mmap_read_unlock(mm);
if (fault & VM_FAULT_OOM) {
do_out_of_memory(regs, address);
do_out_of_memory(regs, write, address);
return;
} else if (fault & VM_FAULT_SIGSEGV) {
do_sigsegv(regs, write, address, si_code);
......
......@@ -35,33 +35,8 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
/*
* We have up to 8 empty zeroed pages so we can map one of the right colour
* when needed. Since page is never written to after the initialization we
* don't have to care about aliases on other CPUs.
*/
unsigned long empty_zero_page, zero_page_mask;
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
void setup_zero_pages(void)
{
unsigned int order, i;
struct page *page;
order = 0;
empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!empty_zero_page)
panic("Oh boy, that early out of memory?");
page = virt_to_page((void *)empty_zero_page);
split_page(page, order);
for (i = 0; i < (1 << order); i++, page++)
mark_page_reserved(page);
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
......@@ -106,7 +81,6 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
}
#endif /* !CONFIG_NUMA */
......@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
#endif
#endif
static pte_t *fixmap_pte(unsigned long addr)
pte_t * __init populate_kernel_pte(unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset_k(addr);
p4d = p4d_offset(pgd, addr);
if (pgd_none(*pgd)) {
pud_t *new __maybe_unused;
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
pgd_populate(&init_mm, pgd, new);
if (p4d_none(*p4d)) {
pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pud)
panic("%s: Failed to allocate memory\n", __func__);
p4d_populate(&init_mm, p4d, pud);
#ifndef __PAGETABLE_PUD_FOLDED
pud_init(new);
pud_init(pud);
#endif
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new __maybe_unused;
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
pud_populate(&init_mm, pud, new);
pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pmd)
panic("%s: Failed to allocate memory\n", __func__);
pud_populate(&init_mm, pud, pmd);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_init(new);
pmd_init(pmd);
#endif
}
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
pte_t *new __maybe_unused;
if (!pmd_present(*pmd)) {
pte_t *pte;
new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
pmd_populate_kernel(&init_mm, pmd, new);
pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
panic("%s: Failed to allocate memory\n", __func__);
pmd_populate_kernel(&init_mm, pmd, pte);
}
return pte_offset_kernel(pmd, addr);
......@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = fixmap_pte(addr);
ptep = populate_kernel_pte(addr);
if (!pte_none(*ptep)) {
pte_ERROR(*ptep);
return;
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#define pr_fmt(fmt) "kasan: " fmt
#include <linux/kasan.h>
#include <linux/memblock.h>
#include <linux/sched/task.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm-generic/sections.h>
static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
#ifdef __PAGETABLE_PUD_FOLDED
#define __p4d_none(early, p4d) (0)
#else
#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
#endif
#ifdef __PAGETABLE_PMD_FOLDED
#define __pud_none(early, pud) (0)
#else
#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
#endif
#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
#define __pte_none(early, pte) (early ? pte_none(pte) : \
((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
bool kasan_early_stage = true;
/*
* Alloc memory for shadow memory page table.
*/
static phys_addr_t __init kasan_alloc_zeroed_page(int node)
{
void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
if (!p)
panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
__func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
return __pa(p);
}
static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
{
if (__pmd_none(early, READ_ONCE(*pmdp))) {
phys_addr_t pte_phys = early ?
__pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
if (!early)
memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
}
return pte_offset_kernel(pmdp, addr);
}
static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
{
if (__pud_none(early, READ_ONCE(*pudp))) {
phys_addr_t pmd_phys = early ?
__pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
if (!early)
memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
}
return pmd_offset(pudp, addr);
}
static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
{
if (__p4d_none(early, READ_ONCE(*p4dp))) {
phys_addr_t pud_phys = early ?
__pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
if (!early)
memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
}
return pud_offset(p4dp, addr);
}
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
do {
phys_addr_t page_phys = early ?
__pa_symbol(kasan_early_shadow_page)
: kasan_alloc_zeroed_page(node);
next = addr + PAGE_SIZE;
set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
} while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
}
static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
do {
next = pmd_addr_end(addr, end);
kasan_pte_populate(pmdp, addr, next, node, early);
} while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
}
static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
do {
next = pud_addr_end(addr, end);
kasan_pmd_populate(pudp, addr, next, node, early);
} while (pudp++, addr = next, addr != end);
}
static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
p4d_t *p4dp = p4d_offset(pgdp, addr);
do {
next = p4d_addr_end(addr, end);
kasan_pud_populate(p4dp, addr, next, node, early);
} while (p4dp++, addr = next, addr != end);
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
int node, bool early)
{
unsigned long next;
pgd_t *pgdp;
pgdp = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
kasan_p4d_populate(pgdp, addr, next, node, early);
} while (pgdp++, addr = next, addr != end);
}
/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
static void __init kasan_map_populate(unsigned long start, unsigned long end,
int node)
{
kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
}
asmlinkage void __init kasan_early_init(void)
{
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
}
static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
{
WRITE_ONCE(*pgdp, pgdval);
}
static void __init clear_pgds(unsigned long start, unsigned long end)
{
/*
* Remove references to kasan page tables from
* swapper_pg_dir. pgd_clear() can't be used
* here because it's nop on 2,3-level pagetable setups
*/
for (; start < end; start += PGDIR_SIZE)
kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
}
void __init kasan_init(void)
{
u64 i;
phys_addr_t pa_start, pa_end;
/*
* PGD was populated as invalid_pmd_table or invalid_pud_table
* in pagetable_init() which depends on how many levels of page
* table you are using, but we had to clean the gpd of kasan
* shadow memory, as the pgd value is none-zero.
* The assertion pgd_none is going to be false and the formal populate
* afterwards is not going to create any new pgd at all.
*/
memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
local_flush_tlb_all();
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
/* Maps everything to a single page of zeroes */
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
kasan_mem_to_shadow((void *)KFENCE_AREA_END));
kasan_early_stage = false;
/* Populate the linear mapping */
for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)phys_to_virt(pa_start);
void *end = (void *)phys_to_virt(pa_end);
if (start >= end)
break;
kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
(unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
}
/* Populate modules mapping */
kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
(unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
/*
* KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
* should make sure that it maps the zero page read-only.
*/
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
memset(kasan_early_shadow_page, 0, PAGE_SIZE);
csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
local_flush_tlb_all();
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized.\n");
}
......@@ -8,12 +8,11 @@
#include <linux/mm.h>
#include <linux/mman.h>
unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */
EXPORT_SYMBOL(shm_align_mask);
#define SHM_ALIGN_MASK (SHMLBA - 1)
#define COLOUR_ALIGN(addr, pgoff) \
((((addr) + shm_align_mask) & ~shm_align_mask) + \
(((pgoff) << PAGE_SHIFT) & shm_align_mask))
#define COLOUR_ALIGN(addr, pgoff) \
((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \
+ (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
enum mmap_allocation_direction {UP, DOWN};
......@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
* cache aliasing constraints.
*/
if ((flags & MAP_SHARED) &&
((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
return -EINVAL;
return addr;
}
......@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
}
info.length = len;
info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (dir == DOWN) {
......
......@@ -9,6 +9,18 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
struct page *dmw_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(virt_to_pfn(kaddr));
}
EXPORT_SYMBOL_GPL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
}
EXPORT_SYMBOL_GPL(tlb_virt_to_page);
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *init, *ret = NULL;
......
# SPDX-License-Identifier: GPL-2.0
# Objects to go into the VDSO.
KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
......
......@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
int kasan_populate_early_shadow(const void *shadow_start,
const void *shadow_end);
#ifndef __HAVE_ARCH_SHADOW_MAP
static inline void *kasan_mem_to_shadow(const void *addr)
{
return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ KASAN_SHADOW_OFFSET;
}
#endif
int kasan_add_zero_shadow(void *start, unsigned long size);
void kasan_remove_zero_shadow(void *start, unsigned long size);
......
......@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
extern const struct raid6_calls raid6_vpermxor2;
extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;
extern const struct raid6_calls raid6_lsx;
extern const struct raid6_calls raid6_lasx;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
......@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
extern const struct raid6_recov_calls raid6_recov_avx512;
extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
extern const struct raid6_recov_calls raid6_recov_lsx;
extern const struct raid6_recov_calls raid6_recov_lasx;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
......
......@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
hostprogs += mktables
......
......@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_neonx2,
&raid6_neonx1,
#endif
#ifdef CONFIG_LOONGARCH
#ifdef CONFIG_CPU_HAS_LASX
&raid6_lasx,
#endif
#ifdef CONFIG_CPU_HAS_LSX
&raid6_lsx,
#endif
#endif
#if defined(__ia64__)
&raid6_intx32,
&raid6_intx16,
......@@ -103,6 +111,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#endif
#if defined(CONFIG_KERNEL_MODE_NEON)
&raid6_recov_neon,
#endif
#ifdef CONFIG_LOONGARCH
#ifdef CONFIG_CPU_HAS_LASX
&raid6_recov_lasx,
#endif
#ifdef CONFIG_CPU_HAS_LSX
&raid6_recov_lsx,
#endif
#endif
&raid6_recov_intx1,
NULL
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*
* raid6/loongarch.h
*
* Definitions common to LoongArch RAID-6 code only
*/
#ifndef _LIB_RAID6_LOONGARCH_H
#define _LIB_RAID6_LOONGARCH_H
#ifdef __KERNEL__
#include <asm/cpu-features.h>
#include <asm/fpu.h>
#else /* for user-space testing */
#include <sys/auxv.h>
/* have to supply these defines for glibc 2.37- and musl */
#ifndef HWCAP_LOONGARCH_LSX
#define HWCAP_LOONGARCH_LSX (1 << 4)
#endif
#ifndef HWCAP_LOONGARCH_LASX
#define HWCAP_LOONGARCH_LASX (1 << 5)
#endif
#define kernel_fpu_begin()
#define kernel_fpu_end()
#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
#endif /* __KERNEL__ */
#endif /* _LIB_RAID6_LOONGARCH_H */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
*
* Copyright 2023 WANG Xuerui <git@xen0n.name>
*
* Based on the generic RAID-6 code (int.uc):
*
* Copyright 2002-2004 H. Peter Anvin
*/
#include <linux/raid/pq.h>
#include "loongarch.h"
/*
* The vector algorithms are currently priority 0, which means the generic
* scalar algorithms are not being disabled if vector support is present.
* This is like the similar LoongArch RAID5 XOR code, with the main reason
* repeated here: it cannot be ruled out at this point of time, that some
* future (maybe reduced) models could run the vector algorithms slower than
* the scalar ones, maybe for errata or micro-op reasons. It may be
* appropriate to revisit this after one or two more uarch generations.
*/
#ifdef CONFIG_CPU_HAS_LSX
#define NSIZE 16
static int raid6_has_lsx(void)
{
return cpu_has_lsx;
}
static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
kernel_fpu_begin();
/*
* $vr0, $vr1, $vr2, $vr3: wp
* $vr4, $vr5, $vr6, $vr7: wq
* $vr8, $vr9, $vr10, $vr11: wd
* $vr12, $vr13, $vr14, $vr15: w2
* $vr16, $vr17, $vr18, $vr19: w1
*/
for (d = 0; d < bytes; d += NSIZE*4) {
/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
asm volatile("vori.b $vr4, $vr0, 0");
asm volatile("vori.b $vr5, $vr1, 0");
asm volatile("vori.b $vr6, $vr2, 0");
asm volatile("vori.b $vr7, $vr3, 0");
for (z = z0-1; z >= 0; z--) {
/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
/* wp$$ ^= wd$$; */
asm volatile("vxor.v $vr0, $vr0, $vr8");
asm volatile("vxor.v $vr1, $vr1, $vr9");
asm volatile("vxor.v $vr2, $vr2, $vr10");
asm volatile("vxor.v $vr3, $vr3, $vr11");
/* w2$$ = MASK(wq$$); */
asm volatile("vslti.b $vr12, $vr4, 0");
asm volatile("vslti.b $vr13, $vr5, 0");
asm volatile("vslti.b $vr14, $vr6, 0");
asm volatile("vslti.b $vr15, $vr7, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("vslli.b $vr16, $vr4, 1");
asm volatile("vslli.b $vr17, $vr5, 1");
asm volatile("vslli.b $vr18, $vr6, 1");
asm volatile("vslli.b $vr19, $vr7, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("vandi.b $vr12, $vr12, 0x1d");
asm volatile("vandi.b $vr13, $vr13, 0x1d");
asm volatile("vandi.b $vr14, $vr14, 0x1d");
asm volatile("vandi.b $vr15, $vr15, 0x1d");
/* w1$$ ^= w2$$; */
asm volatile("vxor.v $vr16, $vr16, $vr12");
asm volatile("vxor.v $vr17, $vr17, $vr13");
asm volatile("vxor.v $vr18, $vr18, $vr14");
asm volatile("vxor.v $vr19, $vr19, $vr15");
/* wq$$ = w1$$ ^ wd$$; */
asm volatile("vxor.v $vr4, $vr16, $vr8");
asm volatile("vxor.v $vr5, $vr17, $vr9");
asm volatile("vxor.v $vr6, $vr18, $vr10");
asm volatile("vxor.v $vr7, $vr19, $vr11");
}
/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
}
kernel_fpu_end();
}
static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
kernel_fpu_begin();
/*
* $vr0, $vr1, $vr2, $vr3: wp
* $vr4, $vr5, $vr6, $vr7: wq
* $vr8, $vr9, $vr10, $vr11: wd
* $vr12, $vr13, $vr14, $vr15: w2
* $vr16, $vr17, $vr18, $vr19: w1
*/
for (d = 0; d < bytes; d += NSIZE*4) {
/* P/Q data pages */
/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
asm volatile("vori.b $vr4, $vr0, 0");
asm volatile("vori.b $vr5, $vr1, 0");
asm volatile("vori.b $vr6, $vr2, 0");
asm volatile("vori.b $vr7, $vr3, 0");
for (z = z0-1; z >= start; z--) {
/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
/* wp$$ ^= wd$$; */
asm volatile("vxor.v $vr0, $vr0, $vr8");
asm volatile("vxor.v $vr1, $vr1, $vr9");
asm volatile("vxor.v $vr2, $vr2, $vr10");
asm volatile("vxor.v $vr3, $vr3, $vr11");
/* w2$$ = MASK(wq$$); */
asm volatile("vslti.b $vr12, $vr4, 0");
asm volatile("vslti.b $vr13, $vr5, 0");
asm volatile("vslti.b $vr14, $vr6, 0");
asm volatile("vslti.b $vr15, $vr7, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("vslli.b $vr16, $vr4, 1");
asm volatile("vslli.b $vr17, $vr5, 1");
asm volatile("vslli.b $vr18, $vr6, 1");
asm volatile("vslli.b $vr19, $vr7, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("vandi.b $vr12, $vr12, 0x1d");
asm volatile("vandi.b $vr13, $vr13, 0x1d");
asm volatile("vandi.b $vr14, $vr14, 0x1d");
asm volatile("vandi.b $vr15, $vr15, 0x1d");
/* w1$$ ^= w2$$; */
asm volatile("vxor.v $vr16, $vr16, $vr12");
asm volatile("vxor.v $vr17, $vr17, $vr13");
asm volatile("vxor.v $vr18, $vr18, $vr14");
asm volatile("vxor.v $vr19, $vr19, $vr15");
/* wq$$ = w1$$ ^ wd$$; */
asm volatile("vxor.v $vr4, $vr16, $vr8");
asm volatile("vxor.v $vr5, $vr17, $vr9");
asm volatile("vxor.v $vr6, $vr18, $vr10");
asm volatile("vxor.v $vr7, $vr19, $vr11");
}
/* P/Q left side optimization */
for (z = start-1; z >= 0; z--) {
/* w2$$ = MASK(wq$$); */
asm volatile("vslti.b $vr12, $vr4, 0");
asm volatile("vslti.b $vr13, $vr5, 0");
asm volatile("vslti.b $vr14, $vr6, 0");
asm volatile("vslti.b $vr15, $vr7, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("vslli.b $vr16, $vr4, 1");
asm volatile("vslli.b $vr17, $vr5, 1");
asm volatile("vslli.b $vr18, $vr6, 1");
asm volatile("vslli.b $vr19, $vr7, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("vandi.b $vr12, $vr12, 0x1d");
asm volatile("vandi.b $vr13, $vr13, 0x1d");
asm volatile("vandi.b $vr14, $vr14, 0x1d");
asm volatile("vandi.b $vr15, $vr15, 0x1d");
/* wq$$ = w1$$ ^ w2$$; */
asm volatile("vxor.v $vr4, $vr16, $vr12");
asm volatile("vxor.v $vr5, $vr17, $vr13");
asm volatile("vxor.v $vr6, $vr18, $vr14");
asm volatile("vxor.v $vr7, $vr19, $vr15");
}
/*
* *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
* *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
*/
asm volatile(
"vld $vr20, %0\n\t"
"vld $vr21, %1\n\t"
"vld $vr22, %2\n\t"
"vld $vr23, %3\n\t"
"vld $vr24, %4\n\t"
"vld $vr25, %5\n\t"
"vld $vr26, %6\n\t"
"vld $vr27, %7\n\t"
"vxor.v $vr20, $vr20, $vr0\n\t"
"vxor.v $vr21, $vr21, $vr1\n\t"
"vxor.v $vr22, $vr22, $vr2\n\t"
"vxor.v $vr23, $vr23, $vr3\n\t"
"vxor.v $vr24, $vr24, $vr4\n\t"
"vxor.v $vr25, $vr25, $vr5\n\t"
"vxor.v $vr26, $vr26, $vr6\n\t"
"vxor.v $vr27, $vr27, $vr7\n\t"
"vst $vr20, %0\n\t"
"vst $vr21, %1\n\t"
"vst $vr22, %2\n\t"
"vst $vr23, %3\n\t"
"vst $vr24, %4\n\t"
"vst $vr25, %5\n\t"
"vst $vr26, %6\n\t"
"vst $vr27, %7\n\t"
: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
"+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
"+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
"+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
);
}
kernel_fpu_end();
}
const struct raid6_calls raid6_lsx = {
raid6_lsx_gen_syndrome,
raid6_lsx_xor_syndrome,
raid6_has_lsx,
"lsx",
.priority = 0 /* see the comment near the top of the file for reason */
};
#undef NSIZE
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
#define NSIZE 32
static int raid6_has_lasx(void)
{
return cpu_has_lasx;
}
static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
kernel_fpu_begin();
/*
* $xr0, $xr1: wp
* $xr2, $xr3: wq
* $xr4, $xr5: wd
* $xr6, $xr7: w2
* $xr8, $xr9: w1
*/
for (d = 0; d < bytes; d += NSIZE*2) {
/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
asm volatile("xvori.b $xr2, $xr0, 0");
asm volatile("xvori.b $xr3, $xr1, 0");
for (z = z0-1; z >= 0; z--) {
/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
/* wp$$ ^= wd$$; */
asm volatile("xvxor.v $xr0, $xr0, $xr4");
asm volatile("xvxor.v $xr1, $xr1, $xr5");
/* w2$$ = MASK(wq$$); */
asm volatile("xvslti.b $xr6, $xr2, 0");
asm volatile("xvslti.b $xr7, $xr3, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("xvslli.b $xr8, $xr2, 1");
asm volatile("xvslli.b $xr9, $xr3, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("xvandi.b $xr6, $xr6, 0x1d");
asm volatile("xvandi.b $xr7, $xr7, 0x1d");
/* w1$$ ^= w2$$; */
asm volatile("xvxor.v $xr8, $xr8, $xr6");
asm volatile("xvxor.v $xr9, $xr9, $xr7");
/* wq$$ = w1$$ ^ wd$$; */
asm volatile("xvxor.v $xr2, $xr8, $xr4");
asm volatile("xvxor.v $xr3, $xr9, $xr5");
}
/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
}
kernel_fpu_end();
}
static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */
kernel_fpu_begin();
/*
* $xr0, $xr1: wp
* $xr2, $xr3: wq
* $xr4, $xr5: wd
* $xr6, $xr7: w2
* $xr8, $xr9: w1
*/
for (d = 0; d < bytes; d += NSIZE*2) {
/* P/Q data pages */
/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
asm volatile("xvori.b $xr2, $xr0, 0");
asm volatile("xvori.b $xr3, $xr1, 0");
for (z = z0-1; z >= start; z--) {
/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
/* wp$$ ^= wd$$; */
asm volatile("xvxor.v $xr0, $xr0, $xr4");
asm volatile("xvxor.v $xr1, $xr1, $xr5");
/* w2$$ = MASK(wq$$); */
asm volatile("xvslti.b $xr6, $xr2, 0");
asm volatile("xvslti.b $xr7, $xr3, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("xvslli.b $xr8, $xr2, 1");
asm volatile("xvslli.b $xr9, $xr3, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("xvandi.b $xr6, $xr6, 0x1d");
asm volatile("xvandi.b $xr7, $xr7, 0x1d");
/* w1$$ ^= w2$$; */
asm volatile("xvxor.v $xr8, $xr8, $xr6");
asm volatile("xvxor.v $xr9, $xr9, $xr7");
/* wq$$ = w1$$ ^ wd$$; */
asm volatile("xvxor.v $xr2, $xr8, $xr4");
asm volatile("xvxor.v $xr3, $xr9, $xr5");
}
/* P/Q left side optimization */
for (z = start-1; z >= 0; z--) {
/* w2$$ = MASK(wq$$); */
asm volatile("xvslti.b $xr6, $xr2, 0");
asm volatile("xvslti.b $xr7, $xr3, 0");
/* w1$$ = SHLBYTE(wq$$); */
asm volatile("xvslli.b $xr8, $xr2, 1");
asm volatile("xvslli.b $xr9, $xr3, 1");
/* w2$$ &= NBYTES(0x1d); */
asm volatile("xvandi.b $xr6, $xr6, 0x1d");
asm volatile("xvandi.b $xr7, $xr7, 0x1d");
/* wq$$ = w1$$ ^ w2$$; */
asm volatile("xvxor.v $xr2, $xr8, $xr6");
asm volatile("xvxor.v $xr3, $xr9, $xr7");
}
/*
* *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
* *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
*/
asm volatile(
"xvld $xr10, %0\n\t"
"xvld $xr11, %1\n\t"
"xvld $xr12, %2\n\t"
"xvld $xr13, %3\n\t"
"xvxor.v $xr10, $xr10, $xr0\n\t"
"xvxor.v $xr11, $xr11, $xr1\n\t"
"xvxor.v $xr12, $xr12, $xr2\n\t"
"xvxor.v $xr13, $xr13, $xr3\n\t"
"xvst $xr10, %0\n\t"
"xvst $xr11, %1\n\t"
"xvst $xr12, %2\n\t"
"xvst $xr13, %3\n\t"
: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
"+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
);
}
kernel_fpu_end();
}
const struct raid6_calls raid6_lasx = {
raid6_lasx_gen_syndrome,
raid6_lasx_xor_syndrome,
raid6_has_lasx,
"lasx",
.priority = 0 /* see the comment near the top of the file for reason */
};
#undef NSIZE
#endif /* CONFIG_CPU_HAS_LASX */
// SPDX-License-Identifier: GPL-2.0-only
/*
* RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
*
* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
*
* Originally based on recov_avx2.c and recov_ssse3.c:
*
* Copyright (C) 2012 Intel Corporation
* Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
*/
#include <linux/raid/pq.h>
#include "loongarch.h"
/*
* Unlike with the syndrome calculation algorithms, there's no boot-time
* selection of recovery algorithms by benchmarking, so we have to specify
* the priorities and hope the future cores will all have decent vector
* support (i.e. no LASX slower than LSX, or even scalar code).
*/
#ifdef CONFIG_CPU_HAS_LSX
static int raid6_has_lsx(void)
{
return cpu_has_lsx;
}
static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
int failb, void **ptrs)
{
u8 *p, *q, *dp, *dq;
const u8 *pbmul; /* P multiplier table for B data */
const u8 *qmul; /* Q multiplier table (for both) */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 2] = dp;
dq = (u8 *)ptrs[failb];
ptrs[failb] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dp;
ptrs[failb] = dq;
ptrs[disks - 2] = p;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
kernel_fpu_begin();
/*
* vr20, vr21: qmul
* vr22, vr23: pbmul
*/
asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
while (bytes) {
/* vr4 - vr7: Q */
asm volatile("vld $vr4, %0" : : "m" (q[0]));
asm volatile("vld $vr5, %0" : : "m" (q[16]));
asm volatile("vld $vr6, %0" : : "m" (q[32]));
asm volatile("vld $vr7, %0" : : "m" (q[48]));
/* vr4 - vr7: Q + Qxy */
asm volatile("vld $vr8, %0" : : "m" (dq[0]));
asm volatile("vld $vr9, %0" : : "m" (dq[16]));
asm volatile("vld $vr10, %0" : : "m" (dq[32]));
asm volatile("vld $vr11, %0" : : "m" (dq[48]));
asm volatile("vxor.v $vr4, $vr4, $vr8");
asm volatile("vxor.v $vr5, $vr5, $vr9");
asm volatile("vxor.v $vr6, $vr6, $vr10");
asm volatile("vxor.v $vr7, $vr7, $vr11");
/* vr0 - vr3: P */
asm volatile("vld $vr0, %0" : : "m" (p[0]));
asm volatile("vld $vr1, %0" : : "m" (p[16]));
asm volatile("vld $vr2, %0" : : "m" (p[32]));
asm volatile("vld $vr3, %0" : : "m" (p[48]));
/* vr0 - vr3: P + Pxy */
asm volatile("vld $vr8, %0" : : "m" (dp[0]));
asm volatile("vld $vr9, %0" : : "m" (dp[16]));
asm volatile("vld $vr10, %0" : : "m" (dp[32]));
asm volatile("vld $vr11, %0" : : "m" (dp[48]));
asm volatile("vxor.v $vr0, $vr0, $vr8");
asm volatile("vxor.v $vr1, $vr1, $vr9");
asm volatile("vxor.v $vr2, $vr2, $vr10");
asm volatile("vxor.v $vr3, $vr3, $vr11");
/* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
asm volatile("vsrli.b $vr8, $vr4, 4");
asm volatile("vsrli.b $vr9, $vr5, 4");
asm volatile("vsrli.b $vr10, $vr6, 4");
asm volatile("vsrli.b $vr11, $vr7, 4");
/* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
asm volatile("vandi.b $vr4, $vr4, 0x0f");
asm volatile("vandi.b $vr5, $vr5, 0x0f");
asm volatile("vandi.b $vr6, $vr6, 0x0f");
asm volatile("vandi.b $vr7, $vr7, 0x0f");
/* lookup from qmul[0] */
asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
/* lookup from qmul[16] */
asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
/* vr16 - vr19: B(Q + Qxy) */
asm volatile("vxor.v $vr16, $vr8, $vr4");
asm volatile("vxor.v $vr17, $vr9, $vr5");
asm volatile("vxor.v $vr18, $vr10, $vr6");
asm volatile("vxor.v $vr19, $vr11, $vr7");
/* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
asm volatile("vsrli.b $vr4, $vr0, 4");
asm volatile("vsrli.b $vr5, $vr1, 4");
asm volatile("vsrli.b $vr6, $vr2, 4");
asm volatile("vsrli.b $vr7, $vr3, 4");
/* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
asm volatile("vandi.b $vr12, $vr0, 0x0f");
asm volatile("vandi.b $vr13, $vr1, 0x0f");
asm volatile("vandi.b $vr14, $vr2, 0x0f");
asm volatile("vandi.b $vr15, $vr3, 0x0f");
/* lookup from pbmul[0] */
asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
/* lookup from pbmul[16] */
asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
/* vr4 - vr7: A(P + Pxy) */
asm volatile("vxor.v $vr4, $vr4, $vr12");
asm volatile("vxor.v $vr5, $vr5, $vr13");
asm volatile("vxor.v $vr6, $vr6, $vr14");
asm volatile("vxor.v $vr7, $vr7, $vr15");
/* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
asm volatile("vxor.v $vr4, $vr4, $vr16");
asm volatile("vxor.v $vr5, $vr5, $vr17");
asm volatile("vxor.v $vr6, $vr6, $vr18");
asm volatile("vxor.v $vr7, $vr7, $vr19");
asm volatile("vst $vr4, %0" : "=m" (dq[0]));
asm volatile("vst $vr5, %0" : "=m" (dq[16]));
asm volatile("vst $vr6, %0" : "=m" (dq[32]));
asm volatile("vst $vr7, %0" : "=m" (dq[48]));
/* vr0 - vr3: P + Pxy + Dx = Dy */
asm volatile("vxor.v $vr0, $vr0, $vr4");
asm volatile("vxor.v $vr1, $vr1, $vr5");
asm volatile("vxor.v $vr2, $vr2, $vr6");
asm volatile("vxor.v $vr3, $vr3, $vr7");
asm volatile("vst $vr0, %0" : "=m" (dp[0]));
asm volatile("vst $vr1, %0" : "=m" (dp[16]));
asm volatile("vst $vr2, %0" : "=m" (dp[32]));
asm volatile("vst $vr3, %0" : "=m" (dp[48]));
bytes -= 64;
p += 64;
q += 64;
dp += 64;
dq += 64;
}
kernel_fpu_end();
}
static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
void **ptrs)
{
u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dq;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
kernel_fpu_begin();
/* vr22, vr23: qmul */
asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
while (bytes) {
/* vr0 - vr3: P + Dx */
asm volatile("vld $vr0, %0" : : "m" (p[0]));
asm volatile("vld $vr1, %0" : : "m" (p[16]));
asm volatile("vld $vr2, %0" : : "m" (p[32]));
asm volatile("vld $vr3, %0" : : "m" (p[48]));
/* vr4 - vr7: Qx */
asm volatile("vld $vr4, %0" : : "m" (dq[0]));
asm volatile("vld $vr5, %0" : : "m" (dq[16]));
asm volatile("vld $vr6, %0" : : "m" (dq[32]));
asm volatile("vld $vr7, %0" : : "m" (dq[48]));
/* vr4 - vr7: Q + Qx */
asm volatile("vld $vr8, %0" : : "m" (q[0]));
asm volatile("vld $vr9, %0" : : "m" (q[16]));
asm volatile("vld $vr10, %0" : : "m" (q[32]));
asm volatile("vld $vr11, %0" : : "m" (q[48]));
asm volatile("vxor.v $vr4, $vr4, $vr8");
asm volatile("vxor.v $vr5, $vr5, $vr9");
asm volatile("vxor.v $vr6, $vr6, $vr10");
asm volatile("vxor.v $vr7, $vr7, $vr11");
/* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
asm volatile("vsrli.b $vr8, $vr4, 4");
asm volatile("vsrli.b $vr9, $vr5, 4");
asm volatile("vsrli.b $vr10, $vr6, 4");
asm volatile("vsrli.b $vr11, $vr7, 4");
/* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
asm volatile("vandi.b $vr4, $vr4, 0x0f");
asm volatile("vandi.b $vr5, $vr5, 0x0f");
asm volatile("vandi.b $vr6, $vr6, 0x0f");
asm volatile("vandi.b $vr7, $vr7, 0x0f");
/* lookup from qmul[0] */
asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
/* lookup from qmul[16] */
asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
/* vr4 - vr7: qmul(Q + Qx) = Dx */
asm volatile("vxor.v $vr4, $vr4, $vr8");
asm volatile("vxor.v $vr5, $vr5, $vr9");
asm volatile("vxor.v $vr6, $vr6, $vr10");
asm volatile("vxor.v $vr7, $vr7, $vr11");
asm volatile("vst $vr4, %0" : "=m" (dq[0]));
asm volatile("vst $vr5, %0" : "=m" (dq[16]));
asm volatile("vst $vr6, %0" : "=m" (dq[32]));
asm volatile("vst $vr7, %0" : "=m" (dq[48]));
/* vr0 - vr3: P + Dx + Dx = P */
asm volatile("vxor.v $vr0, $vr0, $vr4");
asm volatile("vxor.v $vr1, $vr1, $vr5");
asm volatile("vxor.v $vr2, $vr2, $vr6");
asm volatile("vxor.v $vr3, $vr3, $vr7");
asm volatile("vst $vr0, %0" : "=m" (p[0]));
asm volatile("vst $vr1, %0" : "=m" (p[16]));
asm volatile("vst $vr2, %0" : "=m" (p[32]));
asm volatile("vst $vr3, %0" : "=m" (p[48]));
bytes -= 64;
p += 64;
q += 64;
dq += 64;
}
kernel_fpu_end();
}
const struct raid6_recov_calls raid6_recov_lsx = {
.data2 = raid6_2data_recov_lsx,
.datap = raid6_datap_recov_lsx,
.valid = raid6_has_lsx,
.name = "lsx",
.priority = 1,
};
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_CPU_HAS_LASX
static int raid6_has_lasx(void)
{
return cpu_has_lasx;
}
static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
int failb, void **ptrs)
{
u8 *p, *q, *dp, *dq;
const u8 *pbmul; /* P multiplier table for B data */
const u8 *qmul; /* Q multiplier table (for both) */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 2] = dp;
dq = (u8 *)ptrs[failb];
ptrs[failb] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dp;
ptrs[failb] = dq;
ptrs[disks - 2] = p;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
kernel_fpu_begin();
/*
* xr20, xr21: qmul
* xr22, xr23: pbmul
*/
asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
asm volatile("xvreplve0.q $xr20, $xr20");
asm volatile("xvreplve0.q $xr21, $xr21");
asm volatile("xvreplve0.q $xr22, $xr22");
asm volatile("xvreplve0.q $xr23, $xr23");
while (bytes) {
/* xr0, xr1: Q */
asm volatile("xvld $xr0, %0" : : "m" (q[0]));
asm volatile("xvld $xr1, %0" : : "m" (q[32]));
/* xr0, xr1: Q + Qxy */
asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
asm volatile("xvxor.v $xr0, $xr0, $xr4");
asm volatile("xvxor.v $xr1, $xr1, $xr5");
/* xr2, xr3: P */
asm volatile("xvld $xr2, %0" : : "m" (p[0]));
asm volatile("xvld $xr3, %0" : : "m" (p[32]));
/* xr2, xr3: P + Pxy */
asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
asm volatile("xvxor.v $xr2, $xr2, $xr4");
asm volatile("xvxor.v $xr3, $xr3, $xr5");
/* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
asm volatile("xvsrli.b $xr4, $xr0, 4");
asm volatile("xvsrli.b $xr5, $xr1, 4");
/* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
asm volatile("xvandi.b $xr0, $xr0, 0x0f");
asm volatile("xvandi.b $xr1, $xr1, 0x0f");
/* lookup from qmul[0] */
asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
/* lookup from qmul[16] */
asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
/* xr6, xr7: B(Q + Qxy) */
asm volatile("xvxor.v $xr6, $xr4, $xr0");
asm volatile("xvxor.v $xr7, $xr5, $xr1");
/* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
asm volatile("xvsrli.b $xr4, $xr2, 4");
asm volatile("xvsrli.b $xr5, $xr3, 4");
/* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
asm volatile("xvandi.b $xr0, $xr2, 0x0f");
asm volatile("xvandi.b $xr1, $xr3, 0x0f");
/* lookup from pbmul[0] */
asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
/* lookup from pbmul[16] */
asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
/* xr0, xr1: A(P + Pxy) */
asm volatile("xvxor.v $xr0, $xr0, $xr4");
asm volatile("xvxor.v $xr1, $xr1, $xr5");
/* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
asm volatile("xvxor.v $xr0, $xr0, $xr6");
asm volatile("xvxor.v $xr1, $xr1, $xr7");
/* xr2, xr3: P + Pxy + Dx = Dy */
asm volatile("xvxor.v $xr2, $xr2, $xr0");
asm volatile("xvxor.v $xr3, $xr3, $xr1");
asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
bytes -= 64;
p += 64;
q += 64;
dp += 64;
dq += 64;
}
kernel_fpu_end();
}
static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
void **ptrs)
{
u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = (u8 *)ptrs[faila];
ptrs[faila] = (void *)raid6_empty_zero_page;
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dq;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
kernel_fpu_begin();
/* xr22, xr23: qmul */
asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
asm volatile("xvreplve0.q $xr22, $xr22");
asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
asm volatile("xvreplve0.q $xr23, $xr23");
while (bytes) {
/* xr0, xr1: P + Dx */
asm volatile("xvld $xr0, %0" : : "m" (p[0]));
asm volatile("xvld $xr1, %0" : : "m" (p[32]));
/* xr2, xr3: Qx */
asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
/* xr2, xr3: Q + Qx */
asm volatile("xvld $xr4, %0" : : "m" (q[0]));
asm volatile("xvld $xr5, %0" : : "m" (q[32]));
asm volatile("xvxor.v $xr2, $xr2, $xr4");
asm volatile("xvxor.v $xr3, $xr3, $xr5");
/* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
asm volatile("xvsrli.b $xr4, $xr2, 4");
asm volatile("xvsrli.b $xr5, $xr3, 4");
/* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
asm volatile("xvandi.b $xr2, $xr2, 0x0f");
asm volatile("xvandi.b $xr3, $xr3, 0x0f");
/* lookup from qmul[0] */
asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
/* lookup from qmul[16] */
asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
/* xr2, xr3: qmul(Q + Qx) = Dx */
asm volatile("xvxor.v $xr2, $xr2, $xr4");
asm volatile("xvxor.v $xr3, $xr3, $xr5");
/* xr0, xr1: P + Dx + Dx = P */
asm volatile("xvxor.v $xr0, $xr0, $xr2");
asm volatile("xvxor.v $xr1, $xr1, $xr3");
asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
asm volatile("xvst $xr0, %0" : "=m" (p[0]));
asm volatile("xvst $xr1, %0" : "=m" (p[32]));
bytes -= 64;
p += 64;
q += 64;
dq += 64;
}
kernel_fpu_end();
}
const struct raid6_recov_calls raid6_recov_lasx = {
.data2 = raid6_2data_recov_lasx,
.datap = raid6_datap_recov_lasx,
.valid = raid6_has_lasx,
.name = "lasx",
.priority = 2,
};
#endif /* CONFIG_CPU_HAS_LASX */
......@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
endif
ifeq ($(ARCH),loongarch64)
CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \
gcc -c -x assembler - >/dev/null 2>&1 && \
rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \
gcc -c -x assembler - >/dev/null 2>&1 && \
rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
endif
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
CFLAGS += -DCONFIG_X86
......@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
CFLAGS += -DCONFIG_ALTIVEC
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
else ifeq ($(ARCH),loongarch64)
OBJS += loongarch_simd.o recov_loongarch_simd.o
endif
.c.o:
......
......@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
return 0;
}
void __weak __meminit pmd_init(void *addr)
{
}
static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
......@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
pud_populate(&init_mm, pud,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
pmd_init(p);
pud_populate(&init_mm, pud, p);
}
}
zero_pmd_populate(pud, addr, next);
......@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
return 0;
}
void __weak __meminit pud_init(void *addr)
{
}
static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
......@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
p4d_populate(&init_mm, p4d,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
pud_init(p);
p4d_populate(&init_mm, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
......
......@@ -291,16 +291,22 @@ struct kasan_stack_ring {
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#ifndef __HAVE_ARCH_SHADOW_MAP
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
<< KASAN_SHADOW_SCALE_SHIFT);
}
#endif
static __always_inline bool addr_has_metadata(const void *addr)
{
#ifdef __HAVE_ARCH_SHADOW_MAP
return (kasan_mem_to_shadow((void *)addr) != NULL);
#else
return (kasan_reset_tag(addr) >=
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
#endif
}
/**
......
......@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
*/
static unsigned long kfence_init_pool(void)
{
unsigned long addr = (unsigned long)__kfence_pool;
unsigned long addr;
struct page *pages;
int i;
if (!arch_kfence_init_pool())
return addr;
return (unsigned long)__kfence_pool;
addr = (unsigned long)__kfence_pool;
pages = virt_to_page(__kfence_pool);
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment