Commit 4733f09d authored by Qing Zhang's avatar Qing Zhang Committed by Huacai Chen

LoongArch/ftrace: Add dynamic function tracer support

The compiler has inserted 2 NOPs before the regular function prologue.
T series registers are available and safe because of LoongArch's psABI.

At runtime, we can replace nop with bl to enable ftrace call and replace
bl with nop to disable ftrace call. The bl instruction requires us to
save the original RA value, so it saves RA at t0 here.

Details are:

| Compiled   |       Disabled         |        Enabled         |
+------------+------------------------+------------------------+
| nop        | move     t0, ra        | move    t0, ra         |
| nop        | nop                    | bl      ftrace_caller  |
| func_body  | func_body              | func_body              |

The RA value will be recovered by ftrace_regs_entry, and restored into
RA before returning to the regular function prologue. When a function is
not being traced, the "move t0, ra" is not harmful.

1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c)
   The two functions turn each recorded call site of filtered functions
   into a call to ftrace_caller or nops.

2) ftracce_update_ftrace_func (in kernel/ftrace.c)
   turns the nops at ftrace_call into a call to a generic entry for
   function tracers.

3) ftrace_caller (in kernel/mcount_dyn.S)
   The entry where each _mcount call sites calls to once they are
   filtered to be traced.
Co-developed-by: default avatarJinyang He <hejinyang@loongson.cn>
Signed-off-by: default avatarJinyang He <hejinyang@loongson.cn>
Signed-off-by: default avatarQing Zhang <zhangqing@loongson.cn>
Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent a0a458fb
...@@ -88,6 +88,7 @@ config LOONGARCH ...@@ -88,6 +88,7 @@ config LOONGARCH
select HAVE_C_RECORDMCOUNT select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT select HAVE_EBPF_JIT
select HAVE_EXIT_THREAD select HAVE_EXIT_THREAD
select HAVE_FAST_GUP select HAVE_FAST_GUP
......
...@@ -25,6 +25,11 @@ endif ...@@ -25,6 +25,11 @@ endif
32bit-emul = elf32loongarch 32bit-emul = elf32loongarch
64bit-emul = elf64loongarch 64bit-emul = elf64loongarch
ifdef CONFIG_DYNAMIC_FTRACE
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
endif
ifdef CONFIG_64BIT ifdef CONFIG_64BIT
tool-archpref = $(64bit-tool-archpref) tool-archpref = $(64bit-tool-archpref)
UTS_MACHINE := loongarch64 UTS_MACHINE := loongarch64
......
...@@ -11,9 +11,30 @@ ...@@ -11,9 +11,30 @@
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#ifndef CONFIG_DYNAMIC_FTRACE
#define mcount _mcount #define mcount _mcount
extern void _mcount(void); extern void _mcount(void);
extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old); extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old);
#else
struct dyn_ftrace;
struct dyn_arch_ftrace { };
#define ftrace_init_nop ftrace_init_nop
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent);
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */ #endif /* CONFIG_FUNCTION_TRACER */
......
...@@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip) ...@@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
is_imm12_negative(ip->reg2i12_format.immediate); is_imm12_negative(ip->reg2i12_format.immediate);
} }
int larch_insn_read(void *addr, u32 *insnp);
int larch_insn_write(void *addr, u32 insn);
int larch_insn_patch_text(void *addr, u32 insn);
u32 larch_insn_gen_nop(void);
u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk);
u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
......
...@@ -20,7 +20,7 @@ struct unwind_state { ...@@ -20,7 +20,7 @@ struct unwind_state {
char type; /* UNWINDER_XXX */ char type; /* UNWINDER_XXX */
struct stack_info stack_info; struct stack_info stack_info;
struct task_struct *task; struct task_struct *task;
bool first, error; bool first, error, is_ftrace;
unsigned long sp, pc, ra; unsigned long sp, pc, ra;
}; };
......
...@@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) += efi.o ...@@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
obj-y += mcount.o ftrace.o ifndef CONFIG_DYNAMIC_FTRACE
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) obj-y += mcount.o ftrace.o
CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) else
CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) obj-y += mcount_dyn.o ftrace_dyn.o
CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE)
endif
CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
endif endif
obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_MODULES) += module.o module-sections.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Based on arch/arm64/kernel/ftrace.c
*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <asm/inst.h>
static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
{
u32 replaced;
if (validate) {
if (larch_insn_read((void *)pc, &replaced))
return -EFAULT;
if (replaced != old)
return -EINVAL;
}
if (larch_insn_patch_text((void *)pc, new))
return -EPERM;
return 0;
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
u32 new;
unsigned long pc;
pc = (unsigned long)&ftrace_call;
new = larch_insn_gen_bl(pc, (unsigned long)func);
return ftrace_modify_code(pc, 0, new, false);
}
/*
* The compiler has inserted 2 NOPs before the regular function prologue.
* T series registers are available and safe because of LoongArch's psABI.
*
* At runtime, we can replace nop with bl to enable ftrace call and replace bl
* with nop to disable ftrace call. The bl requires us to save the original RA
* value, so it saves RA at t0 here.
*
* Details are:
*
* | Compiled | Disabled | Enabled |
* +------------+------------------------+------------------------+
* | nop | move t0, ra | move t0, ra |
* | nop | nop | bl ftrace_caller |
* | func_body | func_body | func_body |
*
* The RA value will be recovered by ftrace_regs_entry, and restored into RA
* before returning to the regular function prologue. When a function is not
* being traced, the "move t0, ra" is not harmful.
*/
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
u32 old, new;
unsigned long pc;
pc = rec->ip;
old = larch_insn_gen_nop();
new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
return ftrace_modify_code(pc, old, new, true);
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
u32 old, new;
unsigned long pc;
pc = rec->ip + LOONGARCH_INSN_SIZE;
old = larch_insn_gen_nop();
new = larch_insn_gen_bl(pc, addr);
return ftrace_modify_code(pc, old, new, true);
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
u32 old, new;
unsigned long pc;
pc = rec->ip + LOONGARCH_INSN_SIZE;
new = larch_insn_gen_nop();
old = larch_insn_gen_bl(pc, addr);
return ftrace_modify_code(pc, old, new, true);
}
void arch_ftrace_update_code(int command)
{
command |= FTRACE_MAY_SLEEP;
ftrace_modify_all_code(command);
}
int __init ftrace_dyn_arch_init(void)
{
return 0;
}
...@@ -2,8 +2,100 @@ ...@@ -2,8 +2,100 @@
/* /*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/ */
#include <linux/sizes.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/inst.h> #include <asm/inst.h>
static DEFINE_RAW_SPINLOCK(patch_lock);
int larch_insn_read(void *addr, u32 *insnp)
{
int ret;
u32 val;
ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE);
if (!ret)
*insnp = val;
return ret;
}
int larch_insn_write(void *addr, u32 insn)
{
int ret;
unsigned long flags = 0;
raw_spin_lock_irqsave(&patch_lock, flags);
ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE);
raw_spin_unlock_irqrestore(&patch_lock, flags);
return ret;
}
int larch_insn_patch_text(void *addr, u32 insn)
{
int ret;
u32 *tp = addr;
if ((unsigned long)tp & 3)
return -EINVAL;
ret = larch_insn_write(tp, insn);
if (!ret)
flush_icache_range((unsigned long)tp,
(unsigned long)tp + LOONGARCH_INSN_SIZE);
return ret;
}
u32 larch_insn_gen_nop(void)
{
return INSN_NOP;
}
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
{
long offset = dest - pc;
unsigned int immediate_l, immediate_h;
union loongarch_instruction insn;
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
pr_warn("The generated bl instruction is out of range.\n");
return INSN_BREAK;
}
offset >>= 2;
immediate_l = offset & 0xffff;
offset >>= 16;
immediate_h = offset & 0x3ff;
insn.reg0i26_format.opcode = bl_op;
insn.reg0i26_format.immediate_l = immediate_l;
insn.reg0i26_format.immediate_h = immediate_h;
return insn.word;
}
u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk)
{
union loongarch_instruction insn;
insn.reg3_format.opcode = or_op;
insn.reg3_format.rd = rd;
insn.reg3_format.rj = rj;
insn.reg3_format.rk = rk;
return insn.word;
}
u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
{
return larch_insn_gen_or(rd, rj, 0);
}
u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
{ {
union loongarch_instruction insn; union loongarch_instruction insn;
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/regdef.h>
#include <asm/stackframe.h>
.text
/*
* Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the
* regular C function prologue. When PC arrived here, the last 2 instructions
* are as follows:
* move t0, ra
* bl callsite (for modules, callsite is a tramplione)
*
* modules trampoline is as follows:
* lu12i.w t1, callsite[31:12]
* lu32i.d t1, callsite[51:32]
* lu52i.d t1, t1, callsite[63:52]
* jirl zero, t1, callsite[11:0] >> 2
*
* See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to
* that the T series regs are available and safe because each C functions
* follows the LoongArch's psABI as well.
*/
.macro ftrace_regs_entry
PTR_ADDI sp, sp, -PT_SIZE
PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */
PTR_S a0, sp, PT_R4
PTR_S a1, sp, PT_R5
PTR_S a2, sp, PT_R6
PTR_S a3, sp, PT_R7
PTR_S a4, sp, PT_R8
PTR_S a5, sp, PT_R9
PTR_S a6, sp, PT_R10
PTR_S a7, sp, PT_R11
PTR_S fp, sp, PT_R22
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
PTR_ADDI t8, sp, PT_SIZE
PTR_S t8, sp, PT_R3
.endm
SYM_FUNC_START(ftrace_stub)
jr ra
SYM_FUNC_END(ftrace_stub)
SYM_CODE_START(ftrace_common)
PTR_ADDI a0, ra, -8 /* arg0: ip */
move a1, t0 /* arg1: parent_ip */
la.pcrel t1, function_trace_op
PTR_L a2, t1, 0 /* arg2: op */
move a3, sp /* arg3: regs */
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
bl ftrace_stub
/*
* As we didn't use S series regs in this assmembly code and all calls
* are C function which will save S series regs by themselves, there is
* no need to restore S series regs. The T series is available and safe
* at the callsite, so there is no need to restore the T series regs.
*/
ftrace_common_return:
PTR_L ra, sp, PT_R1
PTR_L a0, sp, PT_R4
PTR_L a1, sp, PT_R5
PTR_L a2, sp, PT_R6
PTR_L a3, sp, PT_R7
PTR_L a4, sp, PT_R8
PTR_L a5, sp, PT_R9
PTR_L a6, sp, PT_R10
PTR_L a7, sp, PT_R11
PTR_L fp, sp, PT_R22
PTR_L t0, sp, PT_ERA
PTR_ADDI sp, sp, PT_SIZE
jr t0
SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller)
ftrace_regs_entry
b ftrace_common
SYM_CODE_END(ftrace_caller)
...@@ -8,6 +8,16 @@ ...@@ -8,6 +8,16 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/unwind.h> #include <asm/unwind.h>
static inline void unwind_state_fixup(struct unwind_state *state)
{
#ifdef CONFIG_DYNAMIC_FTRACE
static unsigned long ftrace = (unsigned long)ftrace_call + 4;
if (state->pc == ftrace)
state->is_ftrace = true;
#endif
}
unsigned long unwind_get_return_address(struct unwind_state *state) unsigned long unwind_get_return_address(struct unwind_state *state)
{ {
...@@ -41,15 +51,30 @@ static bool unwind_by_guess(struct unwind_state *state) ...@@ -41,15 +51,30 @@ static bool unwind_by_guess(struct unwind_state *state)
static bool unwind_by_prologue(struct unwind_state *state) static bool unwind_by_prologue(struct unwind_state *state)
{ {
struct stack_info *info = &state->stack_info;
union loongarch_instruction *ip, *ip_end;
long frame_ra = -1; long frame_ra = -1;
unsigned long frame_size = 0; unsigned long frame_size = 0;
unsigned long size, offset, pc = state->pc; unsigned long size, offset, pc = state->pc;
struct pt_regs *regs;
struct stack_info *info = &state->stack_info;
union loongarch_instruction *ip, *ip_end;
if (state->sp >= info->end || state->sp < info->begin) if (state->sp >= info->end || state->sp < info->begin)
return false; return false;
if (state->is_ftrace) {
/*
* As we meet ftrace_regs_entry, reset first flag like first doing
* tracing. Prologue analysis will stop soon because PC is at entry.
*/
regs = (struct pt_regs *)state->sp;
state->first = true;
state->is_ftrace = false;
state->pc = regs->csr_era;
state->ra = regs->regs[1];
state->sp = regs->regs[3];
return true;
}
if (!kallsyms_lookup_size_offset(pc, &size, &offset)) if (!kallsyms_lookup_size_offset(pc, &size, &offset))
return false; return false;
...@@ -95,7 +120,7 @@ static bool unwind_by_prologue(struct unwind_state *state) ...@@ -95,7 +120,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
state->pc = *(unsigned long *)(state->sp + frame_ra); state->pc = *(unsigned long *)(state->sp + frame_ra);
state->sp = state->sp + frame_size; state->sp = state->sp + frame_size;
return !!__kernel_text_address(state->pc); goto out;
first: first:
state->first = false; state->first = false;
...@@ -104,7 +129,9 @@ static bool unwind_by_prologue(struct unwind_state *state) ...@@ -104,7 +129,9 @@ static bool unwind_by_prologue(struct unwind_state *state)
state->pc = state->ra; state->pc = state->ra;
return !!__kernel_text_address(state->ra); out:
unwind_state_fixup(state);
return !!__kernel_text_address(state->pc);
} }
void unwind_start(struct unwind_state *state, struct task_struct *task, void unwind_start(struct unwind_state *state, struct task_struct *task,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment