kprobe源码分析

Cc又菜又帅

已于 2024-01-26 11:14:03 修改

阅读量162

点赞数 1

分类专栏： Linux内核文章标签： linux

于 2023-09-10 11:41:16 首次发布

本文链接：https://blog.csdn.net/weixin_48450161/article/details/132789328

版权

Linux内核专栏收录该内容

6 篇文章 0 订阅

订阅专栏

kprobe是什么

kprobe 是一种动态调试机制，用于debugging，动态跟踪，性能分析，动态修改内核行为等，2004年由IBM发布，是名为Dprobes工具集的底层实现机制[1][2]，2005年合入Linux kernel。probe的含义是像一个探针，可以不修改分析对象源码的情况下，获取Kernel的运行时信息。

kprobe的实现原理是把指定地址（探测点）的指令替换成一个可以让cpu进入debug模式的指令，使执行路径暂停，跳转到probe 处理函数后收集、修改信息，再跳转回来继续执行。

X86中使用的是int3指令，ARM64中使用的是BRK指令进入debug monitor模式。

在这里插入图片描述

这种指令替换机制使得kprobe可以在大部分kernel的代码段插入探测点，除了一些分支类的指令如br、exception、eret等。另外kprobe模块本身的代码不能probe。在pre_handler和post_handler中可以访问、修改所有寄存器和全局变量，其变体jprobe可以检查传入参数，kretprobe可以检查返回值。且不需要修改探测目标的源码，方便用于生产系统的debug、性能分析、log记录等。

kprobe的注册

在这里插入图片描述

kprobe的触发和处理

static struct fault_info __refdata debug_fault_info[] = {
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	},
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	},
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 3"		},
	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"aarch32 vector catch"	},
	{ early_brk64,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 7"		},
};

void __init hook_debug_fault_code(int nr,
				  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
				  int sig, int code, const char *name)
{
	BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));

	debug_fault_info[nr].fn		= fn;
	debug_fault_info[nr].sig	= sig;
	debug_fault_info[nr].code	= code;
	debug_fault_info[nr].name	= name;
}

// brk指令异常处理 -> brk_handler
// 单步调试异常处理 -> single_step_handler
static int __init debug_traps_init(void)
{
	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
			      TRAP_TRACE, "single-step handler");
	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
			      TRAP_BRKPT, "ptrace BRK handler");
	return 0;
}

在这里插入图片描述

point

1.被替换的指令放在哪里？

slot page，使用了module_alloc分配可以执行的内存页

2.一个探测点由多个probe注册怎么处理

aggrprobe

3.SMP、中断、抢占时可能有kprobe重入，如何处理

实现了reenter检查机制，允许probe嵌套

4.kprobe的性能

break指令导致CPU执行停止，时间开销较大
x86实现了优化机制，使用jmp指令替换int3 这种break指令，速度提升10倍；ARM64中未实现。

5.kprobe实现方式

之前的实现方式：通过gcc -pg可以向函数头插入mcount指令，linux启动的时候将mcount指令替换为nop指令，需要probe的时候再替换回来，这样只能实现函数头的trace，现在的方案可以实现任意指令的trace

添加kprobe驱动

/* samples/kprobes/kprobe_example.c */

/*
 * NOTE: This example is works on x86 and powerpc.
 * Here's a sample kernel module showing the use of kprobes to dump a
 * stack trace and selected registers when _do_fork() is called.
 *
 * For more information on theory of operation of kprobes, see
 * Documentation/kprobes.txt
 *
 * You will see the trace data in /var/log/messages and on the console
 * whenever _do_fork() is invoked to create a new process.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

#define MAX_SYMBOL_LEN	64
static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
	.symbol_name	= symbol,
};

/* kprobe pre_handler: called just before the probed instruction is executed */
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_ARM64
	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
			" pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
#endif

	/* A dump_stack() here will give a stack backtrace */
	return 0;
}

/* kprobe post_handler: called after the probed instruction is executed */
static void handler_post(struct kprobe *p, struct pt_regs *regs,
				unsigned long flags)
{
#ifdef CONFIG_ARM64
	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pstate);
#endif
}

/*
 * fault_handler: this is called if an exception is generated for any
 * instruction within the pre- or post-handler, or when Kprobes
 * single-steps the probed instruction.
 */
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
	/* Return 0 because we don't handle the fault. */
	return 0;
}

static int __init kprobe_init(void)
{
	int ret;
	kp.pre_handler = handler_pre;
	kp.post_handler = handler_post;
	kp.fault_handler = handler_fault;

	ret = register_kprobe(&kp);
	if (ret < 0) {
		pr_err("register_kprobe failed, returned %d\n", ret);
		return ret;
	}
	pr_info("Planted kprobe at %p\n", kp.addr);
	return 0;
}

static void __exit kprobe_exit(void)
{
	unregister_kprobe(&kp);
	pr_info("kprobe at %p unregistered\n", kp.addr);
}

module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");

在ftrace中使用kprobe

 p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]  : Set a probe
 r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]  : Set a return probe
 p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]       : Set a return probe
 -:[GRP/]EVENT                                         : Clear a probe

GRP            : Group name. If omitted, use "kprobes" for it.
EVENT          : Event name. If omitted, the event name is generated
                 based on SYM+offs or MEMADDR.
MOD            : Module name which has given SYM.
SYM[+offs]     : Symbol+offset where the probe is inserted.
SYM%return     : Return address of the symbol
MEMADDR        : Address where the probe is inserted.
MAXACTIVE      : Maximum number of instances of the specified function that
                 can be probed simultaneously, or 0 for the default value
                 as defined in Documentation/trace/kprobes.rst section 1.3.1.

FETCHARGS      : Arguments. Each probe can have up to 128 args.
 %REG          : Fetch register REG
 @ADDR         : Fetch memory at ADDR (ADDR should be in kernel)
 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
 $stackN       : Fetch Nth entry of stack (N >= 0)
 $stack        : Fetch stack address.
 $argN         : Fetch the Nth function argument. (N >= 1) (\*1)
 $retval       : Fetch return value.(\*2)
 $comm         : Fetch current task comm.
 +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
 \IMM          : Store an immediate value to the argument.
 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
                 (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
                 (x8/x16/x32/x64), "string", "ustring" and bitfield
                 are supported.

 (\*1) only for the probe on function entry (offs == 0).
 (\*2) only for return probe.
 (\*3) this is useful for fetching a field of data structures.
 (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.

例：
如果想trace __set_task_comm函数，并且打印出来更新后线程的名字

/* buf是线程的新名字 */
void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)

cd /sys/kernel/debug/tracing

echo 'p:myprobe __set_task_comm task_struct=%x0 name=+0(%x1):string exec=+0(%x2) comm=$comm' > kprobe_events

echo 1 > events/kprobes/myprobe/enable

cat trace

echo 0 > events/kprobes/myprobe/enable
echo > kprobe_events

跟踪谁使用了高精度定时器，并打印调用栈

echo 0 > tracing_on
echo 0 > events/kprobes/myprobe/enable

echo 'p:myprobe hrtimer_start_range_ns  ktime=%x1:s64  comm=$comm' > kprobe_events
echo stacktrace > trace_options

echo 1 > tracing_on
echo 1 > events/kprobes/myprobe/enable

参考资料

主要参考文章
- https://www.cnblogs.com/hpyu/p/14257305.html
ftrace kprobe使用说明
- https://docs.kernel.org/trace/kprobetrace.html
- https://blog.csdn.net/jasonactions/article/details/122299418

Cc又菜又帅

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
kprobe源码分析

kprobe 是一种动态调试机制，用于debugging，动态跟踪，性能分析，动态修改内核行为等，2004年由IBM发布，是名为Dprobes工具集的底层实现机制[1][2]，2005年合入Linux kernel。probe的含义是像一个探针，可以不修改分析对象源码的情况下，获取Kernel的运行时信息。kprobe的实现原理是把指定地址（探测点）的指令替换成一个可以让cpu进入debug模式的指令，使执行路径暂停，跳转到probe 处理函数后收集、修改信息，再跳转回来继续执行。
复制链接

扫一扫