Commit 0bfbc914 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'riscv-for-linus-6.10-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:

 - Add byte/half-word compare-and-exchange, emulated via LR/SC loops

 - Support for Rust

 - Support for Zihintpause in hwprobe

 - Add PR_RISCV_SET_ICACHE_FLUSH_CTX prctl()

 - Support lockless lockrefs

* tag 'riscv-for-linus-6.10-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (42 commits)
  riscv: defconfig: Enable CONFIG_CLK_SOPHGO_CV1800
  riscv: select ARCH_HAS_FAST_MULTIPLIER
  riscv: mm: still create swiotlb buffer for kmalloc() bouncing if required
  riscv: Annotate pgtable_l{4,5}_enabled with __ro_after_init
  riscv: Remove redundant CONFIG_64BIT from pgtable_l{4,5}_enabled
  riscv: mm: Always use an ASID to flush mm contexts
  riscv: mm: Preserve global TLB entries when switching contexts
  riscv: mm: Make asid_bits a local variable
  riscv: mm: Use a fixed layout for the MM context ID
  riscv: mm: Introduce cntx2asid/cntx2version helper macros
  riscv: Avoid TLB flush loops when affected by SiFive CIP-1200
  riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma
  riscv: mm: Combine the SMP and UP TLB flush code
  riscv: Only send remote fences when some other CPU is online
  riscv: mm: Broadcast kernel TLB flushes only when needed
  riscv: Use IPIs for remote cache/TLB flushes by default
  riscv: Factor out page table TLB synchronization
  riscv: Flush the instruction cache during SMP bringup
  riscv: hwprobe: export Zihintpause ISA extension
  riscv: misaligned: remove CONFIG_RISCV_M_MODE specific code
  ...
parents 4f05e820 92cce919
.. SPDX-License-Identifier: GPL-2.0
==============================================================================
Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
==============================================================================
CMODX is a programming technique where a program executes instructions that were
modified by the program itself. Instruction storage and the instruction cache
(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
program must enforce its own synchronization with the unprivileged fence.i
instruction.
However, the default Linux ABI prohibits the use of fence.i in userspace
applications. At any point the scheduler may migrate a task onto a new hart. If
migration occurs after the userspace synchronized the icache and instruction
storage with fence.i, the icache on the new hart will no longer be clean. This
is due to the behavior of fence.i only affecting the hart that it is called on.
Thus, the hart that the task has been migrated to may not have synchronized
instruction storage and icache.
There are two ways to solve this problem: use the riscv_flush_icache() syscall,
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
userspace. The syscall performs a one-off icache flushing operation. The prctl
changes the Linux ABI to allow userspace to emit icache flushing operations.
As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
At the time of writing, this only occurs during the riscv_flush_icache() syscall
and when the kernel uses copy_to_user_page(). These deferred flushes happen only
when the memory map being used by a hart changes. If the prctl() context caused
an icache flush, this deferred icache flush will be skipped as it is redundant.
Therefore, there will be no additional flush when using the riscv_flush_icache()
syscall inside of the prctl() context.
prctl() Interface
---------------------
Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
remaining arguments will be delegated to the riscv_set_icache_flush_ctx
function detailed below.
.. kernel-doc:: arch/riscv/mm/cacheflush.c
:identifiers: riscv_set_icache_flush_ctx
Example usage:
The following files are meant to be compiled and linked with each other. The
modify_instruction() function replaces an add with 0 with an add with one,
causing the instruction sequence in get_value() to change from returning a zero
to returning a one.
cmodx.c::
#include <stdio.h>
#include <sys/prctl.h>
extern int get_value();
extern void modify_instruction();
int main()
{
int value = get_value();
printf("Value before cmodx: %d\n", value);
// Call prctl before first fence.i is called inside modify_instruction
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
modify_instruction();
// Call prctl after final fence.i is called in process
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
value = get_value();
printf("Value after cmodx: %d\n", value);
return 0;
}
cmodx.S::
.option norvc
.text
.global modify_instruction
modify_instruction:
lw a0, new_insn
lui a5,%hi(old_insn)
sw a0,%lo(old_insn)(a5)
fence.i
ret
.section modifiable, "awx"
.global get_value
get_value:
li a0, 0
old_insn:
addi a0, a0, 0
ret
.data
new_insn:
addi a0, a0, 1
......@@ -188,6 +188,10 @@ The following keys are defined:
manual starting from commit 95cf1f9 ("Add changes requested by Ved
during signoff")
* :c:macro:`RISCV_HWPROBE_EXT_ZIHINTPAUSE`: The Zihintpause extension is
supported as defined in the RISC-V ISA manual starting from commit
d8ab5c78c207 ("Zihintpause is ratified").
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
information about the selected set of processors.
......
......@@ -13,6 +13,7 @@ RISC-V architecture
patch-acceptance
uabi
vector
cmodx
features
......
......@@ -17,6 +17,7 @@ Architecture Level of support Constraints
============= ================ ==============================================
``arm64`` Maintained Little Endian only.
``loongarch`` Maintained \-
``riscv`` Maintained ``riscv64`` only.
``um`` Maintained ``x86_64`` only.
``x86`` Maintained ``x86_64`` only.
============= ================ ==============================================
......
......@@ -23,6 +23,7 @@ config RISCV
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
......@@ -57,10 +58,11 @@ config RISCV
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USES_CFI_TRAPS if CFI_CLANG
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
......@@ -71,7 +73,7 @@ config RISCV
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
select CLINT_TIMER if !MMU
select CLINT_TIMER if RISCV_M_MODE
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
......@@ -155,6 +157,7 @@ config RISCV
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK if !XIP_KERNEL
select HAVE_RSEQ
select HAVE_RUST if 64BIT
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_STACKPROTECTOR
......@@ -231,8 +234,12 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX
# set if we run in machine mode, cleared if we run in supervisor mode
config RISCV_M_MODE
bool
default !MMU
bool "Build a kernel that runs in machine mode"
depends on !MMU
default y
help
Select this option if you want to run the kernel in M-mode,
without the assistance of any other firmware.
# set if we are running in S-mode and can use SBI calls
config RISCV_SBI
......@@ -249,8 +256,9 @@ config MMU
config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MMU
default 0x80000000 if !MMU
default 0x80000000 if !MMU && RISCV_M_MODE
default 0x80200000 if !MMU
default 0xc0000000 if 32BIT
default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
......@@ -598,7 +606,6 @@ config TOOLCHAIN_HAS_VECTOR_CRYPTO
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
depends on TOOLCHAIN_HAS_ZBB
depends on MMU
depends on RISCV_ALTERNATIVE
default y
help
......@@ -630,7 +637,6 @@ config RISCV_ISA_ZICBOM
config RISCV_ISA_ZICBOZ
bool "Zicboz extension support for faster zeroing of memory"
depends on MMU
depends on RISCV_ALTERNATIVE
default y
help
......
......@@ -34,6 +34,9 @@ ifeq ($(CONFIG_ARCH_RV64I),y)
KBUILD_AFLAGS += -mabi=lp64
KBUILD_LDFLAGS += -melf64lriscv
KBUILD_RUSTFLAGS += -Ctarget-cpu=generic-rv64 --target=riscv64imac-unknown-none-elf \
-Cno-redzone
else
BITS := 32
UTS_MACHINE := riscv32
......@@ -68,6 +71,10 @@ riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v
ifneq ($(CONFIG_RISCV_ISA_C),y)
KBUILD_RUSTFLAGS += -Ctarget-feature=-c
endif
ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
KBUILD_CFLAGS += -Wa,-misa-spec=2.2
KBUILD_AFLAGS += -Wa,-misa-spec=2.2
......@@ -133,7 +140,15 @@ boot := arch/riscv/boot
ifeq ($(CONFIG_XIP_KERNEL),y)
KBUILD_IMAGE := $(boot)/xipImage
else
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/Image.gz
else
KBUILD_IMAGE := $(boot)/vmlinuz.efi
endif
endif
endif
libs-y += arch/riscv/lib/
......@@ -153,17 +168,6 @@ endif
vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg
vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/Image.gz
else
KBUILD_IMAGE := $(boot)/vmlinuz.efi
endif
endif
endif
BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi
all: $(notdir $(KBUILD_IMAGE))
......
......@@ -234,6 +234,7 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RENESAS_OSTM=y
CONFIG_CLK_SOPHGO_CV1800=y
CONFIG_SUN8I_DE2_CCU=m
CONFIG_SUN50I_IOMMU=y
CONFIG_RPMSG_CHAR=y
......
......@@ -42,6 +42,11 @@ static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long imp
return false;
if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626)
return false;
#ifdef CONFIG_MMU
tlb_flush_all_threshold = 0;
#endif
return true;
}
......
......@@ -195,22 +195,28 @@ ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" beq %[p], %[u], 1f\n" \
" add %[rc], %[p], %[a]\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: [a]"r" (_a), [u]"r" (_u) \
: "memory"); \
})
/* This is required to provide a full barrier on success. */
static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int prev, rc;
__asm__ __volatile__ (
"0: lr.w %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
return prev;
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
......@@ -221,77 +227,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
s64 prev;
long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
return prev;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bltz %[p], 1f\n" \
" addi %[rc], %[p], 1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{
int prev, rc;
__asm__ __volatile__ (
"0: lr.w %[p], %[c]\n"
" bltz %[p], 1f\n"
" addi %[rc], %[p], 1\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
return !(prev < 0);
}
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bgtz %[p], 1f\n" \
" addi %[rc], %[p], -1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
{
int prev, rc;
__asm__ __volatile__ (
"0: lr.w %[p], %[c]\n"
" bgtz %[p], 1f\n"
" addi %[rc], %[p], -1\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
return !(prev > 0);
}
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" addi %[rc], %[p], -1\n" \
" bltz %[rc], 1f\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
{
int prev, rc;
__asm__ __volatile__ (
"0: lr.w %[p], %[c]\n"
" addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
return prev - 1;
}
......@@ -303,17 +318,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
s64 prev;
long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
" bltz %[p], 1f\n"
" addi %[rc], %[p], 1\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
return !(prev < 0);
}
......@@ -324,17 +330,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
s64 prev;
long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
" bgtz %[p], 1f\n"
" addi %[rc], %[p], -1\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
return !(prev > 0);
}
......@@ -345,17 +342,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
s64 prev;
long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
" addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
_arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
return prev - 1;
}
......
......@@ -26,8 +26,8 @@
#ifndef __ASSEMBLY__
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
extern int dma_cache_alignment;
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
#define dma_get_cache_alignment dma_get_cache_alignment
static inline int dma_get_cache_alignment(void)
{
......
......@@ -33,8 +33,11 @@ static inline void flush_dcache_page(struct page *page)
* so instead we just flush the whole thing.
*/
#define flush_icache_range(start, end) flush_icache_all()
#define flush_icache_user_page(vma, pg, addr, len) \
flush_icache_mm(vma->vm_mm, 0)
#define flush_icache_user_page(vma, pg, addr, len) \
do { \
if (vma->vm_flags & VM_EXEC) \
flush_icache_mm(vma->vm_mm, 0); \
} while (0)
#ifdef CONFIG_64BIT
#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end)
......
This diff is collapsed.
......@@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
CONFIG_ERRATA_SIFIVE_CIP_453)
#else /* !__ASSEMBLY__ */
#define ALT_FLUSH_TLB_PAGE(x) \
#define ALT_SFENCE_VMA_ASID(asid) \
asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
: : "r" (asid) : "memory")
#define ALT_SFENCE_VMA_ADDR(addr) \
asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
: : "r" (addr) : "memory")
#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \
asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
: : "r" (addr), "r" (asid) : "memory")
/*
* _val is marked as "will be overwritten", so need to set it to 0
* in the default case.
......
......@@ -7,7 +7,6 @@
#ifndef _ASM_RISCV_IRQFLAGS_H
#define _ASM_RISCV_IRQFLAGS_H
#include <asm/processor.h>
#include <asm/csr.h>
/* read interrupt enabled status */
......
......@@ -19,6 +19,8 @@ typedef struct {
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
/* Force local icache flush on all migrations. */
bool force_icache_flush;
#endif
#ifdef CONFIG_BINFMT_ELF_FDPIC
unsigned long exec_fdpic_loadmap;
......@@ -26,6 +28,9 @@ typedef struct {
#endif
} mm_context_t;
#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK)
#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK)
void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot);
#endif /* __ASSEMBLY__ */
......
......@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_PATCH_H
#define _ASM_RISCV_PATCH_H
int patch_insn_write(void *addr, const void *insn, size_t len);
int patch_text_nosync(void *addr, const void *insns, size_t len);
int patch_text_set_nosync(void *addr, u8 c, size_t len);
int patch_text(void *addr, u32 *insns, int ninsns);
......
......@@ -8,6 +8,7 @@
#define _ASM_RISCV_PGALLOC_H
#include <linux/mm.h>
#include <asm/sbi.h>
#include <asm/tlb.h>
#ifdef CONFIG_MMU
......@@ -15,6 +16,14 @@
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
{
if (riscv_use_sbi_for_rfence())
tlb_remove_ptdesc(tlb, pt);
else
tlb_remove_page_ptdesc(tlb, pt);
}
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
......@@ -102,10 +111,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
struct ptdesc *ptdesc = virt_to_ptdesc(pud);
pagetable_pud_dtor(ptdesc);
if (riscv_use_ipi_for_rfence())
tlb_remove_page_ptdesc(tlb, ptdesc);
else
tlb_remove_ptdesc(tlb, ptdesc);
riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
}
......@@ -139,12 +145,8 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long addr)
{
if (pgtable_l5_enabled) {
if (riscv_use_ipi_for_rfence())
tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d));
else
tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
if (pgtable_l5_enabled)
riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
#endif /* __PAGETABLE_PMD_FOLDED */
......@@ -176,10 +178,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
pagetable_pmd_dtor(ptdesc);
if (riscv_use_ipi_for_rfence())
tlb_remove_page_ptdesc(tlb, ptdesc);
else
tlb_remove_ptdesc(tlb, ptdesc);
riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
#endif /* __PAGETABLE_PMD_FOLDED */
......@@ -190,10 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
struct ptdesc *ptdesc = page_ptdesc(pte);
pagetable_pte_dtor(ptdesc);
if (riscv_use_ipi_for_rfence())
tlb_remove_page_ptdesc(tlb, ptdesc);
else
tlb_remove_ptdesc(tlb, ptdesc);
riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
#endif /* CONFIG_MMU */
......
......@@ -68,6 +68,7 @@
#endif
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
struct task_struct;
struct pt_regs;
......@@ -122,6 +123,12 @@ struct thread_struct {
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
struct __riscv_v_ext_state kernel_vstate;
#ifdef CONFIG_SMP
/* Flush the icache on migration */
bool force_icache_flush;
/* A forced icache flush is not needed if migrating to the previous cpu. */
unsigned int prev_cpu;
#endif
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
......@@ -183,6 +190,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr))
#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PROCESSOR_H */
......@@ -387,8 +387,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id);
unsigned long riscv_cached_mimpid(unsigned int cpu_id);
#if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI)
DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
#define riscv_use_sbi_for_rfence() \
static_branch_unlikely(&riscv_sbi_for_rfence)
void sbi_ipi_init(void);
#else
static inline bool riscv_use_sbi_for_rfence(void) { return false; }
static inline void sbi_ipi_init(void) { }
#endif
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_SIGNAL_H
#define __ASM_SIGNAL_H
#include <uapi/asm/signal.h>
#include <uapi/asm/ptrace.h>
asmlinkage __visible
void do_work_pending(struct pt_regs *regs, unsigned long thread_info_flags);
#endif
......@@ -49,12 +49,7 @@ void riscv_ipi_disable(void);
bool riscv_ipi_have_virq_range(void);
/* Set the IPI interrupt numbers for arch (called by irqchip drivers) */
void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence);
/* Check if we can use IPIs for remote FENCEs */
DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
#define riscv_use_ipi_for_rfence() \
static_branch_unlikely(&riscv_ipi_for_rfence)
void riscv_ipi_set_virq_range(int virq, int nr);
/* Check other CPUs stop or not */
bool smp_crash_stop_failed(void);
......@@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void)
return false;
}
static inline void riscv_ipi_set_virq_range(int virq, int nr,
bool use_for_rfence)
static inline void riscv_ipi_set_virq_range(int virq, int nr)
{
}
static inline bool riscv_use_ipi_for_rfence(void)
{
return false;
}
#endif /* CONFIG_SMP */
#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
......
......@@ -13,7 +13,6 @@ struct suspend_context {
/* Saved and restored by low-level functions */
struct pt_regs regs;
/* Saved and restored by high-level functions */
unsigned long scratch;
unsigned long envcfg;
unsigned long tvec;
unsigned long ie;
......
......@@ -8,6 +8,7 @@
#include <linux/jump_label.h>
#include <linux/sched/task_stack.h>
#include <linux/mm_types.h>
#include <asm/vector.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
......@@ -72,14 +73,36 @@ static __always_inline bool has_fpu(void) { return false; }
extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
static inline bool switch_to_should_flush_icache(struct task_struct *task)
{
#ifdef CONFIG_SMP
bool stale_mm = task->mm && task->mm->context.force_icache_flush;
bool stale_thread = task->thread.force_icache_flush;
bool thread_migrated = smp_processor_id() != task->thread.prev_cpu;
return thread_migrated && (stale_mm || stale_thread);
#else
return false;
#endif
}
#ifdef CONFIG_SMP
#define __set_prev_cpu(thread) ((thread).prev_cpu = smp_processor_id())
#else
#define __set_prev_cpu(thread)
#endif
#define switch_to(prev, next, last) \
do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
__set_prev_cpu(__prev->thread); \
if (has_fpu()) \
__switch_to_fpu(__prev, __next); \
if (has_vector()) \
__switch_to_vector(__prev, __next); \
if (switch_to_should_flush_icache(__next)) \
local_flush_icache_all(); \
((last) = __switch_to(__prev, __next)); \
} while (0)
......
......@@ -15,24 +15,34 @@
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)
#ifdef CONFIG_MMU
extern unsigned long asid_mask;
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
}
static inline void local_flush_tlb_all_asid(unsigned long asid)
{
if (asid != FLUSH_TLB_NO_ASID)
ALT_SFENCE_VMA_ASID(asid);
else
local_flush_tlb_all();
}
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
ALT_SFENCE_VMA_ADDR(addr);
}
static inline void local_flush_tlb_page_asid(unsigned long addr,
unsigned long asid)
{
if (asid != FLUSH_TLB_NO_ASID)
ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
else
local_flush_tlb_page(addr);
}
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
#define local_flush_tlb_page(addr) do { } while (0)
#endif /* CONFIG_MMU */
#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
void flush_tlb_all(void);
void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
......@@ -55,27 +65,9 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
void arch_flush_tlb_batched_pending(struct mm_struct *mm);
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#else /* CONFIG_SMP && CONFIG_MMU */
#define flush_tlb_all() local_flush_tlb_all()
#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
local_flush_tlb_all();
}
/* Flush a range of kernel pages */
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
local_flush_tlb_all();
}
#define flush_tlb_mm(mm) flush_tlb_all()
#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
#endif /* !CONFIG_SMP || !CONFIG_MMU */
extern unsigned long tlb_flush_all_threshold;
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_TLBFLUSH_H */
......@@ -59,6 +59,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
#define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
......
......@@ -8,6 +8,7 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/memory.h>
#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
#include <asm/patch.h>
......@@ -75,8 +76,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
make_call_t0(hook_pos, target, call);
/* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
if (patch_text_nosync
((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
return -EPERM;
return 0;
......@@ -88,7 +88,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
make_call_t0(rec->ip, addr, call);
if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE))
return -EPERM;
return 0;
......@@ -99,7 +99,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
{
unsigned int nops[2] = {NOP4, NOP4};
if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
return -EPERM;
return 0;
......@@ -134,6 +134,42 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
struct ftrace_modify_param {
int command;
atomic_t cpu_count;
};
static int __ftrace_modify_code(void *data)
{
struct ftrace_modify_param *param = data;
if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
ftrace_modify_all_code(param->command);
/*
* Make sure the patching store is effective *before* we
* increment the counter which releases all waiting CPUs
* by using the release variant of atomic increment. The
* release pairs with the call to local_flush_icache_all()
* on the waiting CPU.
*/
atomic_inc_return_release(&param->cpu_count);
} else {
while (atomic_read(&param->cpu_count) <= num_online_cpus())
cpu_relax();
}
local_flush_icache_all();
return 0;
}
void arch_ftrace_update_code(int command)
{
struct ftrace_modify_param param = { command, ATOMIC_INIT(0) };
stop_machine(__ftrace_modify_code, &param, cpu_online_mask);
}
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
......
......@@ -196,7 +196,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
static int patch_insn_write(void *addr, const void *insn, size_t len)
int patch_insn_write(void *addr, const void *insn, size_t len)
{
size_t patched = 0;
size_t size;
......@@ -240,16 +240,23 @@ static int patch_text_cb(void *data)
if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
for (i = 0; ret == 0 && i < patch->ninsns; i++) {
len = GET_INSN_LENGTH(patch->insns[i]);
ret = patch_text_nosync(patch->addr + i * len,
&patch->insns[i], len);
ret = patch_insn_write(patch->addr + i * len, &patch->insns[i], len);
}
atomic_inc(&patch->cpu_count);
/*
* Make sure the patching store is effective *before* we
* increment the counter which releases all waiting CPUs
* by using the release variant of atomic increment. The
* release pairs with the call to local_flush_icache_all()
* on the waiting CPU.
*/
atomic_inc_return_release(&patch->cpu_count);
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
smp_mb();
}
local_flush_icache_all();
return ret;
}
NOKPROBE_SYMBOL(patch_text_cb);
......
......@@ -13,6 +13,9 @@
#include <linux/irqdomain.h>
#include <asm/sbi.h>
DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence);
static int sbi_ipi_virq;
static void sbi_ipi_handle(struct irq_desc *desc)
......@@ -72,6 +75,12 @@ void __init sbi_ipi_init(void)
"irqchip/sbi-ipi:starting",
sbi_ipi_starting_cpu, NULL);
riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false);
riscv_ipi_set_virq_range(virq, BITS_PER_BYTE);
pr_info("providing IPIs using SBI IPI extension\n");
/*
* Use the SBI remote fence extension to avoid
* the extra context switch needed to handle IPIs.
*/
static_branch_enable(&riscv_sbi_for_rfence);
}
......@@ -171,10 +171,7 @@ bool riscv_ipi_have_virq_range(void)
return (ipi_virq_base) ? true : false;
}
DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence);
void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
void riscv_ipi_set_virq_range(int virq, int nr)
{
int i, err;
......@@ -197,12 +194,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
/* Enabled IPIs for boot CPU immediately */
riscv_ipi_enable();
/* Update RFENCE static key */
if (use_for_rfence)
static_branch_enable(&riscv_ipi_for_rfence);
else
static_branch_disable(&riscv_ipi_for_rfence);
}
static const char * const ipi_names[] = {
......
......@@ -26,7 +26,7 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
......@@ -234,9 +234,10 @@ asmlinkage __visible void smp_callin(void)
riscv_user_isa_enable();
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
* Remote cache and TLB flushes are ignored while the CPU is offline,
* so flush them both right now just in case.
*/
local_flush_icache_all();
local_flush_tlb_all();
complete(&cpu_running);
/*
......
......@@ -14,7 +14,6 @@
void suspend_save_csrs(struct suspend_context *context)
{
context->scratch = csr_read(CSR_SCRATCH);
if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
......@@ -37,7 +36,7 @@ void suspend_save_csrs(struct suspend_context *context)
void suspend_restore_csrs(struct suspend_context *context)
{
csr_write(CSR_SCRATCH, context->scratch);
csr_write(CSR_SCRATCH, 0);
if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
......
......@@ -111,6 +111,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
EXT_KEY(ZTSO);
EXT_KEY(ZACAS);
EXT_KEY(ZICOND);
EXT_KEY(ZIHINTPAUSE);
if (has_vector()) {
EXT_KEY(ZVBB);
......
......@@ -7,7 +7,6 @@
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
#include <asm-generic/mman-common.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
......
......@@ -264,86 +264,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
#ifdef CONFIG_RISCV_M_MODE
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
{
u8 val;
asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr));
*r_val = val;
return 0;
}
static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
{
asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr));
return 0;
}
static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn)
{
register ulong __mepc asm ("a2") = mepc;
ulong val, rvc_mask = 3, tmp;
asm ("and %[tmp], %[addr], 2\n"
"bnez %[tmp], 1f\n"
#if defined(CONFIG_64BIT)
__stringify(LWU) " %[insn], (%[addr])\n"
#else
__stringify(LW) " %[insn], (%[addr])\n"
#endif
"and %[tmp], %[insn], %[rvc_mask]\n"
"beq %[tmp], %[rvc_mask], 2f\n"
"sll %[insn], %[insn], %[xlen_minus_16]\n"
"srl %[insn], %[insn], %[xlen_minus_16]\n"
"j 2f\n"
"1:\n"
"lhu %[insn], (%[addr])\n"
"and %[tmp], %[insn], %[rvc_mask]\n"
"bne %[tmp], %[rvc_mask], 2f\n"
"lhu %[tmp], 2(%[addr])\n"
"sll %[tmp], %[tmp], 16\n"
"add %[insn], %[insn], %[tmp]\n"
"2:"
: [insn] "=&r" (val), [tmp] "=&r" (tmp)
: [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask),
[xlen_minus_16] "i" (XLEN_MINUS_16));
*r_insn = val;
return 0;
}
#else
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
{
if (user_mode(regs)) {
return __get_user(*r_val, (u8 __user *)addr);
} else {
*r_val = *addr;
return 0;
}
}
static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
{
if (user_mode(regs)) {
return __put_user(val, (u8 __user *)addr);
} else {
*addr = val;
return 0;
}
}
#define __read_insn(regs, insn, insn_addr) \
#define __read_insn(regs, insn, insn_addr, type) \
({ \
int __ret; \
\
if (user_mode(regs)) { \
__ret = __get_user(insn, insn_addr); \
__ret = __get_user(insn, (type __user *) insn_addr); \
} else { \
insn = *(__force u16 *)insn_addr; \
insn = *(type *)insn_addr; \
__ret = 0; \
} \
\
......@@ -356,9 +284,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
if (epc & 0x2) {
ulong tmp = 0;
u16 __user *insn_addr = (u16 __user *)epc;
if (__read_insn(regs, insn, insn_addr))
if (__read_insn(regs, insn, epc, u16))
return -EFAULT;
/* __get_user() uses regular "lw" which sign extend the loaded
* value make sure to clear higher order bits in case we "or" it
......@@ -369,16 +296,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
*r_insn = insn;
return 0;
}
insn_addr++;
if (__read_insn(regs, tmp, insn_addr))
epc += sizeof(u16);
if (__read_insn(regs, tmp, epc, u16))
return -EFAULT;
*r_insn = (tmp << 16) | insn;
return 0;
} else {
u32 __user *insn_addr = (u32 __user *)epc;
if (__read_insn(regs, insn, insn_addr))
if (__read_insn(regs, insn, epc, u32))
return -EFAULT;
if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) {
*r_insn = insn;
......@@ -390,7 +315,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
return 0;
}
}
#endif
union reg_data {
u8 data_bytes[8];
......@@ -409,7 +333,7 @@ int handle_misaligned_load(struct pt_regs *regs)
unsigned long epc = regs->epc;
unsigned long insn;
unsigned long addr = regs->badaddr;
int i, fp = 0, shift = 0, len = 0;
int fp = 0, shift = 0, len = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
......@@ -492,9 +416,11 @@ int handle_misaligned_load(struct pt_regs *regs)
return -EOPNOTSUPP;
val.data_u64 = 0;
for (i = 0; i < len; i++) {
if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
if (user_mode(regs)) {
if (raw_copy_from_user(&val, (u8 __user *)addr, len))
return -1;
} else {
memcpy(&val, (u8 *)addr, len);
}
if (!fp)
......@@ -515,7 +441,7 @@ int handle_misaligned_store(struct pt_regs *regs)
unsigned long epc = regs->epc;
unsigned long insn;
unsigned long addr = regs->badaddr;
int i, len = 0, fp = 0;
int len = 0, fp = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
......@@ -588,9 +514,11 @@ int handle_misaligned_store(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
for (i = 0; i < len; i++) {
if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
if (user_mode(regs)) {
if (raw_copy_to_user((u8 __user *)addr, &val, len))
return -1;
} else {
memcpy((u8 *)addr, &val, len);
}
regs->epc = epc + INSN_LEN(insn);
......
......@@ -13,14 +13,11 @@ endif
KCOV_INSTRUMENT_init.o := n
obj-y += init.o
obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o
obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o tlbflush.o
obj-y += cacheflush.o
obj-y += context.o
obj-y += pmem.o
ifeq ($(CONFIG_MMU),y)
obj-$(CONFIG_SMP) += tlbflush.o
endif
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_KASAN) += kasan_init.o
......
......@@ -5,6 +5,7 @@
#include <linux/acpi.h>
#include <linux/of.h>
#include <linux/prctl.h>
#include <asm/acpi.h>
#include <asm/cacheflush.h>
......@@ -21,7 +22,9 @@ void flush_icache_all(void)
{
local_flush_icache_all();
if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
if (num_online_cpus() < 2)
return;
else if (riscv_use_sbi_for_rfence())
sbi_remote_fence_i(NULL);
else
on_each_cpu(ipi_remote_fence_i, NULL, 1);
......@@ -69,8 +72,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
* with flush_icache_deferred().
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI) &&
!riscv_use_ipi_for_rfence()) {
} else if (riscv_use_sbi_for_rfence()) {
sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
......@@ -152,3 +154,115 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
}
#ifdef CONFIG_SMP
static void set_icache_stale_mask(void)
{
cpumask_t *mask;
bool stale_cpu;
/*
* Mark every other hart's icache as needing a flush for
* this MM. Maintain the previous value of the current
* cpu to handle the case when this function is called
* concurrently on different harts.
*/
mask = &current->mm->context.icache_stale_mask;
stale_cpu = cpumask_test_cpu(smp_processor_id(), mask);
cpumask_setall(mask);
cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu);
}
#endif
/**
* riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in
* userspace.
* @ctx: Set the type of icache flushing instructions permitted/prohibited in
* userspace. Supported values described below.
*
* Supported values for ctx:
*
* * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space.
*
* * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in
* a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``.
* Therefore, caution must be taken; use this flag only when you can guarantee
* that no thread in the process will emit fence.i from this point onward.
*
* @scope: Set scope of where icache flushing instructions are allowed to be
* emitted. Supported values described below.
*
* Supported values for scope:
*
* * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process
* is coherent with instruction storage upon
* migration.
*
* * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is
* coherent with instruction storage upon
* migration.
*
* When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are
* permitted to emit icache flushing instructions. Whenever any thread in the
* process is migrated, the corresponding hart's icache will be guaranteed to be
* consistent with instruction storage. This does not enforce any guarantees
* outside of migration. If a thread modifies an instruction that another thread
* may attempt to execute, the other thread must still emit an icache flushing
* instruction before attempting to execute the potentially modified
* instruction. This must be performed by the user-space program.
*
* In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the
* thread calling this function is permitted to emit icache flushing
* instructions. When the thread is migrated, the corresponding hart's icache
* will be guaranteed to be consistent with instruction storage.
*
* On kernels configured without SMP, this function is a nop as migrations
* across harts will not occur.
*/
int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope)
{
#ifdef CONFIG_SMP
switch (ctx) {
case PR_RISCV_CTX_SW_FENCEI_ON:
switch (scope) {
case PR_RISCV_SCOPE_PER_PROCESS:
current->mm->context.force_icache_flush = true;
break;
case PR_RISCV_SCOPE_PER_THREAD:
current->thread.force_icache_flush = true;
break;
default:
return -EINVAL;
}
break;
case PR_RISCV_CTX_SW_FENCEI_OFF:
switch (scope) {
case PR_RISCV_SCOPE_PER_PROCESS:
current->mm->context.force_icache_flush = false;
set_icache_stale_mask();
break;
case PR_RISCV_SCOPE_PER_THREAD:
current->thread.force_icache_flush = false;
set_icache_stale_mask();
break;
default:
return -EINVAL;
}
break;
default:
return -EINVAL;
}
return 0;
#else
switch (ctx) {
case PR_RISCV_CTX_SW_FENCEI_ON:
case PR_RISCV_CTX_SW_FENCEI_OFF:
return 0;
default:
return -EINVAL;
}
#endif
}
......@@ -15,14 +15,13 @@
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
#ifdef CONFIG_MMU
DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
static unsigned long asid_bits;
static unsigned long num_asids;
unsigned long asid_mask;
static atomic_long_t current_version;
......@@ -81,7 +80,7 @@ static void __flush_context(void)
if (cntx == 0)
cntx = per_cpu(reserved_context, i);
__set_bit(cntx & asid_mask, context_asid_map);
__set_bit(cntx2asid(cntx), context_asid_map);
per_cpu(reserved_context, i) = cntx;
}
......@@ -102,7 +101,7 @@ static unsigned long __new_context(struct mm_struct *mm)
lockdep_assert_held(&context_lock);
if (cntx != 0) {
unsigned long newcntx = ver | (cntx & asid_mask);
unsigned long newcntx = ver | cntx2asid(cntx);
/*
* If our current CONTEXT was active during a rollover, we
......@@ -115,7 +114,7 @@ static unsigned long __new_context(struct mm_struct *mm)
* We had a valid CONTEXT in a previous life, so try to
* re-use it if possible.
*/
if (!__test_and_set_bit(cntx & asid_mask, context_asid_map))
if (!__test_and_set_bit(cntx2asid(cntx), context_asid_map))
return newcntx;
}
......@@ -128,7 +127,7 @@ static unsigned long __new_context(struct mm_struct *mm)
goto set_asid;
/* We're out of ASIDs, so increment current_version */
ver = atomic_long_add_return_relaxed(num_asids, &current_version);
ver = atomic_long_add_return_relaxed(BIT(SATP_ASID_BITS), &current_version);
/* Flush everything */
__flush_context();
......@@ -168,7 +167,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
*/
old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu));
if (old_active_cntx &&
((cntx & ~asid_mask) == atomic_long_read(&current_version)) &&
(cntx2version(cntx) == atomic_long_read(&current_version)) &&
atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu),
old_active_cntx, cntx))
goto switch_mm_fast;
......@@ -177,7 +176,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
/* Check that our ASID belongs to the current_version. */
cntx = atomic_long_read(&mm->context.id);
if ((cntx & ~asid_mask) != atomic_long_read(&current_version)) {
if (cntx2version(cntx) != atomic_long_read(&current_version)) {
cntx = __new_context(mm);
atomic_long_set(&mm->context.id, cntx);
}
......@@ -191,7 +190,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
switch_mm_fast:
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
((cntx & asid_mask) << SATP_ASID_SHIFT) |
(cntx2asid(cntx) << SATP_ASID_SHIFT) |
satp_mode);
if (need_flush_tlb)
......@@ -202,7 +201,7 @@ static void set_mm_noasid(struct mm_struct *mm)
{
/* Switch the page table and blindly nuke entire local TLB */
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
local_flush_tlb_all();
local_flush_tlb_all_asid(0);
}
static inline void set_mm(struct mm_struct *prev,
......@@ -227,7 +226,7 @@ static inline void set_mm(struct mm_struct *prev,
static int __init asids_init(void)
{
unsigned long old;
unsigned long asid_bits, old;
/* Figure-out number of ASID bits in HW */
old = csr_read(CSR_SATP);
......@@ -247,7 +246,6 @@ static int __init asids_init(void)
/* Pre-compute ASID details */
if (asid_bits) {
num_asids = 1 << asid_bits;
asid_mask = num_asids - 1;
}
/*
......@@ -255,7 +253,7 @@ static int __init asids_init(void)
* at-least twice more than CPUs
*/
if (num_asids > (2 * num_possible_cpus())) {
atomic_long_set(&current_version, num_asids);
atomic_long_set(&current_version, BIT(SATP_ASID_BITS));
context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL);
if (!context_asid_map)
......@@ -297,21 +295,23 @@ static inline void set_mm(struct mm_struct *prev,
*
* The "cpu" argument must be the current local CPU number.
*/
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu,
struct task_struct *task)
{
#ifdef CONFIG_SMP
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) {
/*
* Ensure the remote hart's writes are visible to this hart.
* This pairs with a barrier in flush_icache_mm.
*/
smp_mb();
local_flush_icache_all();
}
/*
* If cache will be flushed in switch_to, no need to flush here.
*/
if (!(task && switch_to_should_flush_icache(task)))
local_flush_icache_all();
}
#endif
}
......@@ -334,5 +334,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
set_mm(prev, next, cpu);
flush_icache_deferred(next, cpu);
flush_icache_deferred(next, cpu, task);
}
......@@ -50,8 +50,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32;
EXPORT_SYMBOL(satp_mode);
#ifdef CONFIG_64BIT
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL);
bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
EXPORT_SYMBOL(pgtable_l5_enabled);
#endif
......@@ -162,11 +162,25 @@ static void print_vm_layout(void) { }
void __init mem_init(void)
{
bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit);
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */
swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb &&
dma_cache_alignment != 1) {
/*
* If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb
* buffer per 1GB of RAM for kmalloc() bouncing on
* non-coherent platforms.
*/
unsigned long size =
DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
swiotlb = true;
}
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
memblock_free_all();
print_vm_layout();
......
......@@ -7,34 +7,11 @@
#include <asm/sbi.h>
#include <asm/mmu_context.h>
static inline void local_flush_tlb_all_asid(unsigned long asid)
{
if (asid != FLUSH_TLB_NO_ASID)
__asm__ __volatile__ ("sfence.vma x0, %0"
:
: "r" (asid)
: "memory");
else
local_flush_tlb_all();
}
static inline void local_flush_tlb_page_asid(unsigned long addr,
unsigned long asid)
{
if (asid != FLUSH_TLB_NO_ASID)
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r" (addr), "r" (asid)
: "memory");
else
local_flush_tlb_page(addr);
}
/*
* Flush entire TLB if number of entries to be flushed is greater
* than the threshold below.
*/
static unsigned long tlb_flush_all_threshold __read_mostly = 64;
unsigned long tlb_flush_all_threshold __read_mostly = 64;
static void local_flush_tlb_range_threshold_asid(unsigned long start,
unsigned long size,
......@@ -79,10 +56,12 @@ static void __ipi_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
if (riscv_use_ipi_for_rfence())
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
else
if (num_online_cpus() < 2)
local_flush_tlb_all();
else if (riscv_use_sbi_for_rfence())
sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
else
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
}
struct flush_tlb_range_data {
......@@ -103,46 +82,34 @@ static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
struct flush_tlb_range_data ftd;
bool broadcast;
unsigned int cpu;
if (cpumask_empty(cmask))
return;
if (cmask != cpu_online_mask) {
unsigned int cpuid;
cpu = get_cpu();
cpuid = get_cpu();
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
/* Check if the TLB flush needs to be sent to other CPUs. */
if (cpumask_any_but(cmask, cpu) >= nr_cpu_ids) {
local_flush_tlb_range_asid(start, size, stride, asid);
} else if (riscv_use_sbi_for_rfence()) {
sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else {
broadcast = true;
}
struct flush_tlb_range_data ftd;
if (broadcast) {
if (riscv_use_ipi_for_rfence()) {
ftd.asid = asid;
ftd.start = start;
ftd.size = size;
ftd.stride = stride;
on_each_cpu_mask(cmask,
__ipi_flush_tlb_range_asid,
&ftd, 1);
} else
sbi_remote_sfence_vma_asid(cmask,
start, size, asid);
} else {
local_flush_tlb_range_asid(start, size, stride, asid);
ftd.asid = asid;
ftd.start = start;
ftd.size = size;
ftd.stride = stride;
on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1);
}
if (cmask != cpu_online_mask)
put_cpu();
put_cpu();
}
static inline unsigned long get_mm_asid(struct mm_struct *mm)
{
return static_branch_unlikely(&use_asid_allocator) ?
atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
return cntx2asid(atomic_long_read(&mm->context.id));
}
void flush_tlb_mm(struct mm_struct *mm)
......
......@@ -251,7 +251,7 @@ static int __init clint_timer_init_dt(struct device_node *np)
}
irq_set_chained_handler(clint_ipi_irq, clint_ipi_interrupt);
riscv_ipi_set_virq_range(rc, BITS_PER_BYTE, true);
riscv_ipi_set_virq_range(rc, BITS_PER_BYTE);
clint_clear_ipi();
#endif
......
......@@ -543,6 +543,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
/**
* cpumask_assign_cpu - assign a cpu in a cpumask
* @cpu: cpu number (< nr_cpu_ids)
* @dstp: the cpumask pointer
* @bool: the value to assign
*/
static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
{
assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
}
static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
{
__assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
}
/**
* cpumask_test_cpu - test for a cpu in a cpumask
* @cpu: cpu number (< nr_cpu_ids)
......
......@@ -306,6 +306,12 @@ struct prctl_mm_map {
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
# define PR_RISCV_CTX_SW_FENCEI_ON 0
# define PR_RISCV_CTX_SW_FENCEI_OFF 1
# define PR_RISCV_SCOPE_PER_PROCESS 0
# define PR_RISCV_SCOPE_PER_THREAD 1
/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
#define PR_PPC_GET_DEXCR 72
#define PR_PPC_SET_DEXCR 73
......
......@@ -146,6 +146,9 @@
#ifndef RISCV_V_GET_CONTROL
# define RISCV_V_GET_CONTROL() (-EINVAL)
#endif
#ifndef RISCV_SET_ICACHE_FLUSH_CTX
# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
#endif
#ifndef PPC_GET_DEXCR_ASPECT
# define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL)
#endif
......@@ -2776,6 +2779,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_V_GET_CONTROL:
error = RISCV_V_GET_CONTROL();
break;
case PR_RISCV_SET_ICACHE_FLUSH_CTX:
error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
break;
default:
error = -EINVAL;
break;
......
......@@ -150,6 +150,12 @@ fn main() {
// `llvm-target`s are taken from `scripts/Makefile.clang`.
if cfg.has("ARM64") {
panic!("arm64 uses the builtin rustc aarch64-unknown-none target");
} else if cfg.has("RISCV") {
if cfg.has("64BIT") {
panic!("64-bit RISC-V uses the builtin rustc riscv64-unknown-none-elf target");
} else {
panic!("32-bit RISC-V is an unsupported architecture");
}
} else if cfg.has("X86_64") {
ts.push("arch", "x86_64");
ts.push(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment