Commit afc76b8b authored by Guo Ren's avatar Guo Ren Committed by Palmer Dabbelt

riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT

This patch changes the current detour mechanism of dynamic ftrace
which has been discussed during LPC 2020 RISCV-MC [1].

Before the patch, we used mcount for detour:
<funca>:
	addi sp,sp,-16
	sd   ra,8(sp)
	sd   s0,0(sp)
	addi s0,sp,16
	mv   a5,ra
	mv   a0,a5
	auipc ra,0x0 -> nop
	jalr  -296(ra) <_mcount@plt> ->nop
	...

After the patch, we use nop call site area for detour:
<funca>:
	nop -> REG_S ra, -SZREG(sp)
	nop -> auipc ra, 0x?
	nop -> jalr ?(ra)
	nop -> REG_L ra, -SZREG(sp)
	...

The mcount mechanism is mixed with gcc function prologue which is
not very clear. The patchable function entry just put 16 bytes nop
before the front of the function prologue which could be filled
with a separated detour mechanism.

[1] https://www.linuxplumbersconf.org/event/7/contributions/807/Signed-off-by: default avatarGuo Ren <guoren@linux.alibaba.com>
Signed-off-by: default avatarPalmer Dabbelt <palmerdabbelt@google.com>
parent 5ad84adf
...@@ -12,6 +12,8 @@ OBJCOPYFLAGS := -O binary ...@@ -12,6 +12,8 @@ OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux := LDFLAGS_vmlinux :=
ifeq ($(CONFIG_DYNAMIC_FTRACE),y) ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax LDFLAGS_vmlinux := --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=8
endif endif
ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
......
...@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, ...@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
return 0; return 0;
} }
/*
* Put 5 instructions with 16 bytes at the front of function within
* patchable function entry nops' area.
*
* 0: REG_S ra, -SZREG(sp)
* 1: auipc ra, 0x?
* 2: jalr -?(ra)
* 3: REG_L ra, -SZREG(sp)
*
* So the opcodes is:
* 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
* 1: 0x???????? -> auipc
* 2: 0x???????? -> jalr
* 3: 0xff813083 (ld)/0xffc12083 (lw)
*/
#if __riscv_xlen == 64
#define INSN0 0xfe113c23
#define INSN3 0xff813083
#elif __riscv_xlen == 32
#define INSN0 0xfe112e23
#define INSN3 0xffc12083
#endif
#define FUNC_ENTRY_SIZE 16
#define FUNC_ENTRY_JMP 4
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{ {
int ret = ftrace_check_current_call(rec->ip, NULL); unsigned int call[4] = {INSN0, 0, 0, INSN3};
unsigned long target = addr;
unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
if (ret) call[1] = to_auipc_insn((unsigned int)(target - caller));
return ret; call[2] = to_jalr_insn((unsigned int)(target - caller));
return __ftrace_modify_call(rec->ip, addr, true); if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
return -EPERM;
return 0;
} }
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr) unsigned long addr)
{ {
unsigned int call[2]; unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
int ret;
make_call(rec->ip, addr, call); if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
ret = ftrace_check_current_call(rec->ip, call); return -EPERM;
if (ret)
return ret;
return __ftrace_modify_call(rec->ip, addr, false); return 0;
} }
...@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, ...@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr) unsigned long addr)
{ {
unsigned int call[2]; unsigned int call[2];
unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
int ret; int ret;
make_call(rec->ip, old_addr, call); make_call(caller, old_addr, call);
ret = ftrace_check_current_call(rec->ip, call); ret = ftrace_check_current_call(caller, call);
if (ret) if (ret)
return ret; return ret;
return __ftrace_modify_call(rec->ip, addr, true); return __ftrace_modify_call(caller, addr, true);
} }
#endif #endif
...@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, ...@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void); extern void ftrace_graph_call(void);
extern void ftrace_graph_regs_call(void);
int ftrace_enable_ftrace_graph_caller(void) int ftrace_enable_ftrace_graph_caller(void)
{ {
unsigned int call[2];
static int init_graph = 1;
int ret; int ret;
make_call(&ftrace_graph_call, &ftrace_stub, call); ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, true);
/*
* When enabling graph tracer for the first time, ftrace_graph_call
* should contains a call to ftrace_stub. Once it has been disabled,
* the 8-bytes at the position becomes NOPs.
*/
if (init_graph) {
ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
call);
init_graph = 0;
} else {
ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
NULL);
}
if (ret) if (ret)
return ret; return ret;
return __ftrace_modify_call((unsigned long)&ftrace_graph_call, return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, true); (unsigned long)&prepare_ftrace_return, true);
} }
int ftrace_disable_ftrace_graph_caller(void) int ftrace_disable_ftrace_graph_caller(void)
{ {
unsigned int call[2];
int ret; int ret;
make_call(&ftrace_graph_call, &prepare_ftrace_return, call); ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, false);
/*
* This is to make sure that ftrace_enable_ftrace_graph_caller
* did the right thing.
*/
ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
call);
if (ret) if (ret)
return ret; return ret;
return __ftrace_modify_call((unsigned long)&ftrace_graph_call, return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, false); (unsigned long)&prepare_ftrace_return, false);
} }
#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_DYNAMIC_FTRACE */
......
...@@ -13,224 +13,186 @@ ...@@ -13,224 +13,186 @@
.text .text
.macro SAVE_ABI_STATE #define FENTRY_RA_OFFSET 12
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define ABI_SIZE_ON_STACK 72
addi sp, sp, -48 #define ABI_A0 0
sd s0, 32(sp) #define ABI_A1 8
sd ra, 40(sp) #define ABI_A2 16
addi s0, sp, 48 #define ABI_A3 24
sd t0, 24(sp) #define ABI_A4 32
sd t1, 16(sp) #define ABI_A5 40
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST #define ABI_A6 48
sd t2, 8(sp) #define ABI_A7 56
#endif #define ABI_RA 64
#else
addi sp, sp, -16 .macro SAVE_ABI
sd s0, 0(sp) addi sp, sp, -SZREG
sd ra, 8(sp) addi sp, sp, -ABI_SIZE_ON_STACK
addi s0, sp, 16
#endif REG_S a0, ABI_A0(sp)
REG_S a1, ABI_A1(sp)
REG_S a2, ABI_A2(sp)
REG_S a3, ABI_A3(sp)
REG_S a4, ABI_A4(sp)
REG_S a5, ABI_A5(sp)
REG_S a6, ABI_A6(sp)
REG_S a7, ABI_A7(sp)
REG_S ra, ABI_RA(sp)
.endm .endm
.macro RESTORE_ABI_STATE .macro RESTORE_ABI
#ifdef CONFIG_FUNCTION_GRAPH_TRACER REG_L a0, ABI_A0(sp)
ld s0, 32(sp) REG_L a1, ABI_A1(sp)
ld ra, 40(sp) REG_L a2, ABI_A2(sp)
addi sp, sp, 48 REG_L a3, ABI_A3(sp)
#else REG_L a4, ABI_A4(sp)
ld ra, 8(sp) REG_L a5, ABI_A5(sp)
ld s0, 0(sp) REG_L a6, ABI_A6(sp)
addi sp, sp, 16 REG_L a7, ABI_A7(sp)
#endif REG_L ra, ABI_RA(sp)
addi sp, sp, ABI_SIZE_ON_STACK
addi sp, sp, SZREG
.endm .endm
.macro RESTORE_GRAPH_ARGS #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
ld a0, 24(sp) .macro SAVE_ALL
ld a1, 16(sp) addi sp, sp, -SZREG
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST addi sp, sp, -PT_SIZE_ON_STACK
ld a2, 8(sp)
#endif REG_S x1, PT_EPC(sp)
addi sp, sp, PT_SIZE_ON_STACK
REG_L x1, (sp)
addi sp, sp, -PT_SIZE_ON_STACK
REG_S x1, PT_RA(sp)
REG_L x1, PT_EPC(sp)
REG_S x2, PT_SP(sp)
REG_S x3, PT_GP(sp)
REG_S x4, PT_TP(sp)
REG_S x5, PT_T0(sp)
REG_S x6, PT_T1(sp)
REG_S x7, PT_T2(sp)
REG_S x8, PT_S0(sp)
REG_S x9, PT_S1(sp)
REG_S x10, PT_A0(sp)
REG_S x11, PT_A1(sp)
REG_S x12, PT_A2(sp)
REG_S x13, PT_A3(sp)
REG_S x14, PT_A4(sp)
REG_S x15, PT_A5(sp)
REG_S x16, PT_A6(sp)
REG_S x17, PT_A7(sp)
REG_S x18, PT_S2(sp)
REG_S x19, PT_S3(sp)
REG_S x20, PT_S4(sp)
REG_S x21, PT_S5(sp)
REG_S x22, PT_S6(sp)
REG_S x23, PT_S7(sp)
REG_S x24, PT_S8(sp)
REG_S x25, PT_S9(sp)
REG_S x26, PT_S10(sp)
REG_S x27, PT_S11(sp)
REG_S x28, PT_T3(sp)
REG_S x29, PT_T4(sp)
REG_S x30, PT_T5(sp)
REG_S x31, PT_T6(sp)
.endm .endm
ENTRY(ftrace_graph_caller) .macro RESTORE_ALL
addi sp, sp, -16 REG_L x1, PT_RA(sp)
sd s0, 0(sp) addi sp, sp, PT_SIZE_ON_STACK
sd ra, 8(sp) REG_S x1, (sp)
addi s0, sp, 16 addi sp, sp, -PT_SIZE_ON_STACK
ftrace_graph_call: REG_L x1, PT_EPC(sp)
.global ftrace_graph_call REG_L x2, PT_SP(sp)
/* REG_L x3, PT_GP(sp)
* Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the REG_L x4, PT_TP(sp)
* call below. Check ftrace_modify_all_code for details. REG_L x5, PT_T0(sp)
*/ REG_L x6, PT_T1(sp)
call ftrace_stub REG_L x7, PT_T2(sp)
ld ra, 8(sp) REG_L x8, PT_S0(sp)
ld s0, 0(sp) REG_L x9, PT_S1(sp)
addi sp, sp, 16 REG_L x10, PT_A0(sp)
ret REG_L x11, PT_A1(sp)
ENDPROC(ftrace_graph_caller) REG_L x12, PT_A2(sp)
REG_L x13, PT_A3(sp)
REG_L x14, PT_A4(sp)
REG_L x15, PT_A5(sp)
REG_L x16, PT_A6(sp)
REG_L x17, PT_A7(sp)
REG_L x18, PT_S2(sp)
REG_L x19, PT_S3(sp)
REG_L x20, PT_S4(sp)
REG_L x21, PT_S5(sp)
REG_L x22, PT_S6(sp)
REG_L x23, PT_S7(sp)
REG_L x24, PT_S8(sp)
REG_L x25, PT_S9(sp)
REG_L x26, PT_S10(sp)
REG_L x27, PT_S11(sp)
REG_L x28, PT_T3(sp)
REG_L x29, PT_T4(sp)
REG_L x30, PT_T5(sp)
REG_L x31, PT_T6(sp)
addi sp, sp, PT_SIZE_ON_STACK
addi sp, sp, SZREG
.endm
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
ENTRY(ftrace_caller) ENTRY(ftrace_caller)
/* SAVE_ABI
* a0: the address in the caller when calling ftrace_caller
* a1: the caller's return address
* a2: the address of global variable function_trace_op
*/
ld a1, -8(s0)
addi a0, ra, -MCOUNT_INSN_SIZE
la t5, function_trace_op
ld a2, 0(t5)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER addi a0, ra, -FENTRY_RA_OFFSET
/* la a1, function_trace_op
* the graph tracer (specifically, prepare_ftrace_return) needs these REG_L a2, 0(a1)
* arguments but for now the function tracer occupies the regs, so we REG_L a1, ABI_SIZE_ON_STACK(sp)
* save them in temporary regs to recover later. mv a3, sp
*/
addi t0, s0, -8
mv t1, a0
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
ld t2, -16(s0)
#endif
#endif
SAVE_ABI_STATE
ftrace_call: ftrace_call:
.global ftrace_call .global ftrace_call
/*
* For the dynamic ftrace to work, here we should reserve at least
* 8 bytes for a functional auipc-jalr pair. The following call
* serves this purpose.
*
* Calling ftrace_update_ftrace_func would overwrite the nops below.
* Check ftrace_modify_all_code for details.
*/
call ftrace_stub call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_FUNCTION_GRAPH_TRACER
RESTORE_GRAPH_ARGS addi a0, sp, ABI_SIZE_ON_STACK
call ftrace_graph_caller REG_L a1, ABI_RA(sp)
addi a1, a1, -FENTRY_RA_OFFSET
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
mv a2, s0
#endif #endif
ftrace_graph_call:
RESTORE_ABI_STATE .global ftrace_graph_call
call ftrace_stub
#endif
RESTORE_ABI
ret ret
ENDPROC(ftrace_caller) ENDPROC(ftrace_caller)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
.macro SAVE_ALL
addi sp, sp, -(PT_SIZE_ON_STACK+16)
sd s0, (PT_SIZE_ON_STACK)(sp)
sd ra, (PT_SIZE_ON_STACK+8)(sp)
addi s0, sp, (PT_SIZE_ON_STACK+16)
sd x1, PT_RA(sp)
sd x2, PT_SP(sp)
sd x3, PT_GP(sp)
sd x4, PT_TP(sp)
sd x5, PT_T0(sp)
sd x6, PT_T1(sp)
sd x7, PT_T2(sp)
sd x8, PT_S0(sp)
sd x9, PT_S1(sp)
sd x10, PT_A0(sp)
sd x11, PT_A1(sp)
sd x12, PT_A2(sp)
sd x13, PT_A3(sp)
sd x14, PT_A4(sp)
sd x15, PT_A5(sp)
sd x16, PT_A6(sp)
sd x17, PT_A7(sp)
sd x18, PT_S2(sp)
sd x19, PT_S3(sp)
sd x20, PT_S4(sp)
sd x21, PT_S5(sp)
sd x22, PT_S6(sp)
sd x23, PT_S7(sp)
sd x24, PT_S8(sp)
sd x25, PT_S9(sp)
sd x26, PT_S10(sp)
sd x27, PT_S11(sp)
sd x28, PT_T3(sp)
sd x29, PT_T4(sp)
sd x30, PT_T5(sp)
sd x31, PT_T6(sp)
.endm
.macro RESTORE_ALL
ld x1, PT_RA(sp)
ld x2, PT_SP(sp)
ld x3, PT_GP(sp)
ld x4, PT_TP(sp)
ld x5, PT_T0(sp)
ld x6, PT_T1(sp)
ld x7, PT_T2(sp)
ld x8, PT_S0(sp)
ld x9, PT_S1(sp)
ld x10, PT_A0(sp)
ld x11, PT_A1(sp)
ld x12, PT_A2(sp)
ld x13, PT_A3(sp)
ld x14, PT_A4(sp)
ld x15, PT_A5(sp)
ld x16, PT_A6(sp)
ld x17, PT_A7(sp)
ld x18, PT_S2(sp)
ld x19, PT_S3(sp)
ld x20, PT_S4(sp)
ld x21, PT_S5(sp)
ld x22, PT_S6(sp)
ld x23, PT_S7(sp)
ld x24, PT_S8(sp)
ld x25, PT_S9(sp)
ld x26, PT_S10(sp)
ld x27, PT_S11(sp)
ld x28, PT_T3(sp)
ld x29, PT_T4(sp)
ld x30, PT_T5(sp)
ld x31, PT_T6(sp)
ld s0, (PT_SIZE_ON_STACK)(sp)
ld ra, (PT_SIZE_ON_STACK+8)(sp)
addi sp, sp, (PT_SIZE_ON_STACK+16)
.endm
.macro RESTORE_GRAPH_REG_ARGS
ld a0, PT_T0(sp)
ld a1, PT_T1(sp)
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
ld a2, PT_T2(sp)
#endif
.endm
/*
* Most of the contents are the same as ftrace_caller.
*/
ENTRY(ftrace_regs_caller) ENTRY(ftrace_regs_caller)
/*
* a3: the address of all registers in the stack
*/
ld a1, -8(s0)
addi a0, ra, -MCOUNT_INSN_SIZE
la t5, function_trace_op
ld a2, 0(t5)
addi a3, sp, -(PT_SIZE_ON_STACK+16)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
addi t0, s0, -8
mv t1, a0
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
ld t2, -16(s0)
#endif
#endif
SAVE_ALL SAVE_ALL
addi a0, ra, -FENTRY_RA_OFFSET
la a1, function_trace_op
REG_L a2, 0(a1)
REG_L a1, PT_SIZE_ON_STACK(sp)
mv a3, sp
ftrace_regs_call: ftrace_regs_call:
.global ftrace_regs_call .global ftrace_regs_call
call ftrace_stub call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_FUNCTION_GRAPH_TRACER
RESTORE_GRAPH_REG_ARGS addi a0, sp, PT_RA
call ftrace_graph_caller REG_L a1, PT_EPC(sp)
addi a1, a1, -FENTRY_RA_OFFSET
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
mv a2, s0
#endif
ftrace_graph_regs_call:
.global ftrace_graph_regs_call
call ftrace_stub
#endif #endif
RESTORE_ALL RESTORE_ALL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment