kprobe源码分析

kprobe是什么

kprobe 是一种动态调试机制,用于debugging,动态跟踪,性能分析,动态修改内核行为等,2004年由IBM发布,是名为Dprobes工具集的底层实现机制[1][2],2005年合入Linux kernel。probe的含义是像一个探针,可以不修改分析对象源码的情况下,获取Kernel的运行时信息。

kprobe的实现原理是把指定地址(探测点)的指令替换成一个可以让cpu进入debug模式的指令,使执行路径暂停,跳转到probe 处理函数后收集、修改信息,再跳转回来继续执行。

X86中使用的是int3指令,ARM64中使用的是BRK指令进入debug monitor模式。

在这里插入图片描述

这种指令替换机制使得kprobe可以在大部分kernel的代码段插入探测点,除了一些分支类的指令如br、exception、eret等。另外kprobe模块本身的代码不能probe。在pre_handler和post_handler中可以访问、修改所有寄存器和全局变量,其变体jprobe可以检查传入参数,kretprobe可以检查返回值。且不需要修改探测目标的源码,方便用于生产系统的debug、性能分析、log记录等。

kprobe的注册

在这里插入图片描述

kprobe的触发和处理

static struct fault_info __refdata debug_fault_info[] = {
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	},
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	},
	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 3"		},
	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"aarch32 vector catch"	},
	{ early_brk64,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		},
	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 7"		},
};

void __init hook_debug_fault_code(int nr,
				  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
				  int sig, int code, const char *name)
{
	BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));

	debug_fault_info[nr].fn		= fn;
	debug_fault_info[nr].sig	= sig;
	debug_fault_info[nr].code	= code;
	debug_fault_info[nr].name	= name;
}

// brk指令异常处理 -> brk_handler
// 单步调试异常处理 -> single_step_handler
static int __init debug_traps_init(void)
{
	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
			      TRAP_TRACE, "single-step handler");
	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
			      TRAP_BRKPT, "ptrace BRK handler");
	return 0;
}

在这里插入图片描述
在这里插入图片描述

point

1.被替换的指令放在哪里?

  • slot page,使用了module_alloc分配可以执行的内存页

2.一个探测点由多个probe注册怎么处理

  • aggrprobe

3.SMP、中断、抢占时可能有kprobe重入,如何处理

  • 实现了reenter检查机制,允许probe嵌套

4.kprobe的性能

  • break指令导致CPU执行停止,时间开销较大
  • x86实现了优化机制,使用jmp指令替换int3 这种break指令,速度提升10倍;ARM64中未实现。

5.kprobe实现方式

  • 之前的实现方式:通过gcc -pg可以向函数头插入mcount指令,linux启动的时候将mcount指令替换为nop指令,需要probe的时候再替换回来,这样只能实现函数头的trace,现在的方案可以实现任意指令的trace

添加kprobe驱动

/* samples/kprobes/kprobe_example.c */

/*
 * NOTE: This example is works on x86 and powerpc.
 * Here's a sample kernel module showing the use of kprobes to dump a
 * stack trace and selected registers when _do_fork() is called.
 *
 * For more information on theory of operation of kprobes, see
 * Documentation/kprobes.txt
 *
 * You will see the trace data in /var/log/messages and on the console
 * whenever _do_fork() is invoked to create a new process.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

#define MAX_SYMBOL_LEN	64
static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
	.symbol_name	= symbol,
};

/* kprobe pre_handler: called just before the probed instruction is executed */
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_ARM64
	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
			" pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
#endif

	/* A dump_stack() here will give a stack backtrace */
	return 0;
}

/* kprobe post_handler: called after the probed instruction is executed */
static void handler_post(struct kprobe *p, struct pt_regs *regs,
				unsigned long flags)
{
#ifdef CONFIG_ARM64
	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pstate);
#endif
}

/*
 * fault_handler: this is called if an exception is generated for any
 * instruction within the pre- or post-handler, or when Kprobes
 * single-steps the probed instruction.
 */
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
	/* Return 0 because we don't handle the fault. */
	return 0;
}

static int __init kprobe_init(void)
{
	int ret;
	kp.pre_handler = handler_pre;
	kp.post_handler = handler_post;
	kp.fault_handler = handler_fault;

	ret = register_kprobe(&kp);
	if (ret < 0) {
		pr_err("register_kprobe failed, returned %d\n", ret);
		return ret;
	}
	pr_info("Planted kprobe at %p\n", kp.addr);
	return 0;
}

static void __exit kprobe_exit(void)
{
	unregister_kprobe(&kp);
	pr_info("kprobe at %p unregistered\n", kp.addr);
}

module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");

在ftrace中使用kprobe

 p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]  : Set a probe
 r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]  : Set a return probe
 p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]       : Set a return probe
 -:[GRP/]EVENT                                         : Clear a probe

GRP            : Group name. If omitted, use "kprobes" for it.
EVENT          : Event name. If omitted, the event name is generated
                 based on SYM+offs or MEMADDR.
MOD            : Module name which has given SYM.
SYM[+offs]     : Symbol+offset where the probe is inserted.
SYM%return     : Return address of the symbol
MEMADDR        : Address where the probe is inserted.
MAXACTIVE      : Maximum number of instances of the specified function that
                 can be probed simultaneously, or 0 for the default value
                 as defined in Documentation/trace/kprobes.rst section 1.3.1.

FETCHARGS      : Arguments. Each probe can have up to 128 args.
 %REG          : Fetch register REG
 @ADDR         : Fetch memory at ADDR (ADDR should be in kernel)
 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
 $stackN       : Fetch Nth entry of stack (N >= 0)
 $stack        : Fetch stack address.
 $argN         : Fetch the Nth function argument. (N >= 1) (\*1)
 $retval       : Fetch return value.(\*2)
 $comm         : Fetch current task comm.
 +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
 \IMM          : Store an immediate value to the argument.
 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
                 (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
                 (x8/x16/x32/x64), "string", "ustring" and bitfield
                 are supported.

 (\*1) only for the probe on function entry (offs == 0).
 (\*2) only for return probe.
 (\*3) this is useful for fetching a field of data structures.
 (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.

例:
如果想trace __set_task_comm函数,并且打印出来更新后线程的名字

/* buf是线程的新名字 */
void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
cd /sys/kernel/debug/tracing

echo 'p:myprobe __set_task_comm task_struct=%x0 name=+0(%x1):string exec=+0(%x2) comm=$comm' > kprobe_events

echo 1 > events/kprobes/myprobe/enable

cat trace

echo 0 > events/kprobes/myprobe/enable
echo > kprobe_events

跟踪谁使用了高精度定时器,并打印调用栈

echo 0 > tracing_on
echo 0 > events/kprobes/myprobe/enable

echo 'p:myprobe hrtimer_start_range_ns  ktime=%x1:s64  comm=$comm' > kprobe_events
echo stacktrace > trace_options

echo 1 > tracing_on
echo 1 > events/kprobes/myprobe/enable

参考资料

  • 主要参考文章
    • https://www.cnblogs.com/hpyu/p/14257305.html
  • ftrace kprobe使用说明
    • https://docs.kernel.org/trace/kprobetrace.html
    • https://blog.csdn.net/jasonactions/article/details/122299418
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值