Commit c5674d00 authored by Greentime Hu's avatar Greentime Hu Committed by Palmer Dabbelt

riscv: Add vector extension XOR implementation

This patch adds support for vector optimized XOR and it is tested in
qemu.
Co-developed-by: default avatarHan-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: default avatarHan-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: default avatarGreentime Hu <greentime.hu@sifive.com>
Signed-off-by: default avatarAndy Chiu <andy.chiu@sifive.com>
Tested-by: default avatarBjörn Töpel <bjorn@rivosinc.com>
Tested-by: default avatarLad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20240115055929.4736-4-andy.chiu@sifive.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 956895b9
......@@ -9,6 +9,24 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2);
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3);
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4);
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5);
#endif /* CONFIG_RISCV_ISA_V */
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#ifdef CONFIG_RISCV_ISA_V
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/asm-prototypes.h>
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2)
{
kernel_vector_begin();
xor_regs_2_(bytes, p1, p2);
kernel_vector_end();
}
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3)
{
kernel_vector_begin();
xor_regs_3_(bytes, p1, p2, p3);
kernel_vector_end();
}
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4)
{
kernel_vector_begin();
xor_regs_4_(bytes, p1, p2, p3, p4);
kernel_vector_end();
}
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5)
{
kernel_vector_begin();
xor_regs_5_(bytes, p1, p2, p3, p4, p5);
kernel_vector_end();
}
static struct xor_block_template xor_block_rvv = {
.name = "rvv",
.do_2 = xor_vector_2,
.do_3 = xor_vector_3,
.do_4 = xor_vector_4,
.do_5 = xor_vector_5
};
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_32regs); \
if (has_vector()) { \
xor_speed(&xor_block_rvv);\
} \
} while (0)
#endif
......@@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/linkage.h>
#include <linux/export.h>
#include <asm/asm.h>
SYM_FUNC_START(xor_regs_2_)
vsetvli a3, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a3
vxor.vv v16, v0, v8
add a2, a2, a3
vse8.v v16, (a1)
add a1, a1, a3
bnez a0, xor_regs_2_
ret
SYM_FUNC_END(xor_regs_2_)
EXPORT_SYMBOL(xor_regs_2_)
SYM_FUNC_START(xor_regs_3_)
vsetvli a4, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a4
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a4
vxor.vv v16, v0, v16
add a3, a3, a4
vse8.v v16, (a1)
add a1, a1, a4
bnez a0, xor_regs_3_
ret
SYM_FUNC_END(xor_regs_3_)
EXPORT_SYMBOL(xor_regs_3_)
SYM_FUNC_START(xor_regs_4_)
vsetvli a5, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a5
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a5
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a5
vxor.vv v16, v0, v24
add a4, a4, a5
vse8.v v16, (a1)
add a1, a1, a5
bnez a0, xor_regs_4_
ret
SYM_FUNC_END(xor_regs_4_)
EXPORT_SYMBOL(xor_regs_4_)
SYM_FUNC_START(xor_regs_5_)
vsetvli a6, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a6
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a6
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a6
vxor.vv v0, v0, v24
vle8.v v8, (a5)
add a4, a4, a6
vxor.vv v16, v0, v8
add a5, a5, a6
vse8.v v16, (a1)
add a1, a1, a6
bnez a0, xor_regs_5_
ret
SYM_FUNC_END(xor_regs_5_)
EXPORT_SYMBOL(xor_regs_5_)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment