Commit 6cdda814 authored by Prasanna S. Panchamukhi's avatar Prasanna S. Panchamukhi Committed by Linus Torvalds

[PATCH] kprobes: kprobes ported to x86_64

Adopted from i386 architecture.

Kprobes:

Helps developers to trap at almost any kernel code address, specifying a
handler routine to be invoked when the breakpoint is hit.  Useful for
analysing the Linux kernel by collecting debugging information
non-disruptively.  Employs single-stepping out-of-line to avoid probe
misses on SMP and may be especially useful in aiding debugging elusive
races and problems on live systems.  More elaborate dynamic tracing tools
can be built over the kprobes interface.

Sample usage:
	To place a probe on __blockdev_direct_IO:
	static int probe_handler(struct kprobe *p, struct pt_regs *)
	{
		... whatever ...
	}
	struct kprobe kp = {
		.addr = __blockdev_direct_IO,
		.pre_handler = probe_handler
	};
	register_kprobe(&kp);

Jprobes:

A special kprobe type which can be placed on function entry points, and
employs a simple mirroring principle to allow seamless access to the
arguments of a function being probed.  The probe handler routine should
have the same prototype as the function being probed.

The way it works is that when the probe is hit, the breakpoint handler
simply irets to the probe handler's rip while retaining register and stack
state corresponding to the function entry.  After it is done, the probe
handler calls jprobe_return() which traps again to restore processor state
and switch back to the probed function.  Linus noted correctly at KS that
we need to be careful as gcc assumes that the callee owns arguments.  We
save and restore enough stack bytes to cover argument space.

Sample Usage:
	static int jip_queue_xmit(struct sk_buff *skb, int ipfragok)
	{
		... whatever ...
		jprobe_return();
		return 0;
	}

	struct jprobe jp = {
		{.addr = (kprobe_opcode_t *) ip_queue_xmit},
		.entry = (kprobe_opcode_t *) jip_queue_xmit
	};
	register_jprobe(&jp);
Signed-off-by: default avatarPrasanna S Panchamukhi <prasanna@in.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 3d6921be
...@@ -33,6 +33,16 @@ config IOMMU_DEBUG ...@@ -33,6 +33,16 @@ config IOMMU_DEBUG
options. See Documentation/x86_64/boot-options.txt for more options. See Documentation/x86_64/boot-options.txt for more
details. details.
config KPROBES
bool "Kprobes"
depends on DEBUG_KERNEL
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
a probepoint and specifies the callback. Kprobes is useful
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
config IOMMU_LEAK config IOMMU_LEAK
bool "IOMMU leak tracing" bool "IOMMU leak tracing"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL
......
...@@ -26,6 +26,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -26,6 +26,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
......
This diff is collapsed.
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <linux/irq.h> #include <linux/irq.h>
extern struct gate_struct idt_table[256]; extern struct gate_struct idt_table[256];
asmlinkage void divide_error(void); asmlinkage void divide_error(void);
...@@ -73,6 +74,17 @@ asmlinkage void spurious_interrupt_bug(void); ...@@ -73,6 +74,17 @@ asmlinkage void spurious_interrupt_bug(void);
asmlinkage void call_debug(void); asmlinkage void call_debug(void);
struct notifier_block *die_chain; struct notifier_block *die_chain;
static spinlock_t die_notifier_lock = SPIN_LOCK_UNLOCKED;
int register_die_notifier(struct notifier_block *nb)
{
int err = 0;
unsigned long flags;
spin_lock_irqsave(&die_notifier_lock, flags);
err = notifier_chain_register(&die_chain, nb);
spin_unlock_irqrestore(&die_notifier_lock, flags);
return err;
}
static inline void conditional_sti(struct pt_regs *regs) static inline void conditional_sti(struct pt_regs *regs)
{ {
...@@ -475,7 +487,6 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ ...@@ -475,7 +487,6 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
} }
DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
DO_ERROR( 3, SIGTRAP, "int3", int3);
DO_ERROR( 4, SIGSEGV, "overflow", overflow) DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
...@@ -625,6 +636,15 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) ...@@ -625,6 +636,15 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
inb(0x71); /* dummy */ inb(0x71); /* dummy */
} }
asmlinkage void do_int3(struct pt_regs * regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
return;
}
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
return;
}
/* runs on IST stack. */ /* runs on IST stack. */
asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
{ {
...@@ -654,6 +674,10 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code) ...@@ -654,6 +674,10 @@ asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
asm("movq %%db6,%0" : "=r" (condition)); asm("movq %%db6,%0" : "=r" (condition));
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP) {
return regs;
}
conditional_sti(regs); conditional_sti(regs);
/* Mask out spurious debug traps due to lazy DR7 setting */ /* Mask out spurious debug traps due to lazy DR7 setting */
...@@ -886,8 +910,8 @@ void __init trap_init(void) ...@@ -886,8 +910,8 @@ void __init trap_init(void)
set_intr_gate(0,&divide_error); set_intr_gate(0,&divide_error);
set_intr_gate_ist(1,&debug,DEBUG_STACK); set_intr_gate_ist(1,&debug,DEBUG_STACK);
set_intr_gate_ist(2,&nmi,NMI_STACK); set_intr_gate_ist(2,&nmi,NMI_STACK);
set_system_gate(3,&int3); /* int3-5 can be called from all */ set_intr_gate(3,&int3);
set_system_gate(4,&overflow); set_system_gate(4,&overflow); /* int4-5 can be called from all */
set_system_gate(5,&bounds); set_system_gate(5,&bounds);
set_intr_gate(6,&invalid_op); set_intr_gate(6,&invalid_op);
set_intr_gate(7,&device_not_available); set_intr_gate(7,&device_not_available);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/delay.h> #include <asm/delay.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/kdebug.h>
extern spinlock_t rtc_lock; extern spinlock_t rtc_lock;
...@@ -197,6 +198,7 @@ EXPORT_SYMBOL(_atomic_dec_and_lock); ...@@ -197,6 +198,7 @@ EXPORT_SYMBOL(_atomic_dec_and_lock);
#endif #endif
EXPORT_SYMBOL(die_chain); EXPORT_SYMBOL(die_chain);
EXPORT_SYMBOL(register_die_notifier);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
EXPORT_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_sibling_map);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/kprobes.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -32,6 +33,7 @@ ...@@ -32,6 +33,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/kdebug.h> #include <asm/kdebug.h>
#include <asm-generic/sections.h> #include <asm-generic/sections.h>
#include <asm/kdebug.h>
void bust_spinlocks(int yes) void bust_spinlocks(int yes)
{ {
...@@ -268,6 +270,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -268,6 +270,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* get the address */ /* get the address */
__asm__("movq %%cr2,%0":"=r" (address)); __asm__("movq %%cr2,%0":"=r" (address));
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
if (likely(regs->eflags & X86_EFLAGS_IF)) if (likely(regs->eflags & X86_EFLAGS_IF))
local_irq_enable(); local_irq_enable();
......
...@@ -16,8 +16,8 @@ struct die_args { ...@@ -16,8 +16,8 @@ struct die_args {
/* Note - you should never unregister because that can race with NMIs. /* Note - you should never unregister because that can race with NMIs.
If you really want to do it first unregister - then synchronize_kernel - then free. If you really want to do it first unregister - then synchronize_kernel - then free.
*/ */
int register_die_notifier(struct notifier_block *nb);
extern struct notifier_block *die_chain; extern struct notifier_block *die_chain;
/* Grossly misnamed. */ /* Grossly misnamed. */
enum die_val { enum die_val {
DIE_OOPS = 1, DIE_OOPS = 1,
...@@ -32,6 +32,7 @@ enum die_val { ...@@ -32,6 +32,7 @@ enum die_val {
DIE_GPF, DIE_GPF,
DIE_CALL, DIE_CALL,
DIE_NMI_IPI, DIE_NMI_IPI,
DIE_PAGE_FAULT,
}; };
static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig) static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
......
#ifndef _ASM_KPROBES_H
#define _ASM_KPROBES_H
/*
* Kernel Probes (KProbes)
* include/asm-x86_64/kprobes.h
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2004-Oct Prasanna S Panchamukhi <prasanna@in.ibm.com> and Jim Keniston
* kenistoj@us.ibm.com adopted from i386.
*/
#include <linux/types.h>
#include <linux/ptrace.h>
struct pt_regs;
typedef u8 kprobe_opcode_t;
#define BREAKPOINT_INSTRUCTION 0xcc
#define MAX_INSN_SIZE 15
#define MAX_STACK_SIZE 64
#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
? (MAX_STACK_SIZE) \
: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
kprobe_opcode_t *insn;
};
/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
* if necessary, before executing the original int3/1 (trap) handler.
*/
static inline void restore_interrupts(struct pt_regs *regs)
{
if (regs->eflags & IF_MASK)
local_irq_enable();
}
extern int post_kprobe_handler(struct pt_regs *regs);
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
#endif /* _ASM_KPROBES_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment