Commit d0fdc20b authored by Jisheng Zhang's avatar Jisheng Zhang Committed by Palmer Dabbelt

riscv: select DCACHE_WORD_ACCESS for efficient unaligned access HW

DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string
comparisons in the vfs layer.

This patch implements support for load_unaligned_zeropad in much the
same way as has been done for arm64.

Here is the test program and step:

 $ cat tt.c
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>

 #define ITERATIONS 1000000

 #define PATH "123456781234567812345678123456781"

 int main(void)
 {
         unsigned long i;
         struct stat buf;

         for (i = 0; i < ITERATIONS; i++)
                 stat(PATH, &buf);

         return 0;
 }

 $ gcc -O2 tt.c
 $ touch 123456781234567812345678123456781
 $ time ./a.out

Per my test on T-HEAD C910 platforms, the above test performance is
improved by about 7.5%.
Signed-off-by: default avatarJisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20231225044207.3821-3-jszhang@kernel.orgSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent b6da6cbe
...@@ -654,6 +654,7 @@ config RISCV_MISALIGNED ...@@ -654,6 +654,7 @@ config RISCV_MISALIGNED
config RISCV_EFFICIENT_UNALIGNED_ACCESS config RISCV_EFFICIENT_UNALIGNED_ACCESS
bool "Assume the CPU supports fast unaligned memory accesses" bool "Assume the CPU supports fast unaligned memory accesses"
depends on NONPORTABLE depends on NONPORTABLE
select DCACHE_WORD_ACCESS if MMU
select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EFFICIENT_UNALIGNED_ACCESS
help help
Say Y here if you want the kernel to assume that the CPU supports Say Y here if you want the kernel to assume that the CPU supports
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define EX_TYPE_FIXUP 1 #define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2 #define EX_TYPE_BPF 2
#define EX_TYPE_UACCESS_ERR_ZERO 3 #define EX_TYPE_UACCESS_ERR_ZERO 3
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
...@@ -47,6 +48,11 @@ ...@@ -47,6 +48,11 @@
#define EX_DATA_REG_ZERO_SHIFT 5 #define EX_DATA_REG_ZERO_SHIFT 5
#define EX_DATA_REG_ZERO GENMASK(9, 5) #define EX_DATA_REG_ZERO GENMASK(9, 5)
#define EX_DATA_REG_DATA_SHIFT 0
#define EX_DATA_REG_DATA GENMASK(4, 0)
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
#define EX_DATA_REG(reg, gpr) \ #define EX_DATA_REG(reg, gpr) \
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
...@@ -62,6 +68,15 @@ ...@@ -62,6 +68,15 @@
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
"(" \
EX_DATA_REG(DATA, data) " | " \
EX_DATA_REG(ADDR, addr) \
")")
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#else /* CONFIG_MMU */ #else /* CONFIG_MMU */
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define _ASM_RISCV_WORD_AT_A_TIME_H #define _ASM_RISCV_WORD_AT_A_TIME_H
#include <asm/asm-extable.h>
#include <linux/kernel.h> #include <linux/kernel.h>
struct word_at_a_time { struct word_at_a_time {
...@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask) ...@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
/* The mask we created is directly usable as a bytemask */ /* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask) #define zero_bytemask(mask) (mask)
#ifdef CONFIG_DCACHE_WORD_ACCESS
/*
* Load an unaligned word from kernel space.
*
* In the (very unlikely) case of the word being a page-crosser
* and the next page not being mapped, take the exception and
* return zeroes in the non-existing part.
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret;
/* Load word from unaligned pointer addr */
asm(
"1: " REG_L " %0, %2\n"
"2:\n"
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
: "=&r" (ret)
: "r" (addr), "m" (*(unsigned long *)addr));
return ret;
}
#endif /* CONFIG_DCACHE_WORD_ACCESS */
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */ #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
...@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, ...@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
return true; return true;
} }
static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
{
if (unlikely(!offset || offset > MAX_REG_OFFSET))
return 0;
return *(unsigned long *)((unsigned long)regs + offset);
}
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
unsigned long val) unsigned long val)
{ {
...@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, ...@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
return true; return true;
} }
static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned long data, addr, offset;
addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
offset = addr & 0x7UL;
addr &= ~0x7UL;
data = *(unsigned long *)addr >> (offset * 8);
regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
regs->epc = get_ex_fixup(ex);
return true;
}
bool fixup_exception(struct pt_regs *regs) bool fixup_exception(struct pt_regs *regs)
{ {
const struct exception_table_entry *ex; const struct exception_table_entry *ex;
...@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs) ...@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_bpf(ex, regs); return ex_handler_bpf(ex, regs);
case EX_TYPE_UACCESS_ERR_ZERO: case EX_TYPE_UACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs); return ex_handler_uaccess_err_zero(ex, regs);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
} }
BUG(); BUG();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment