call_chain & dump_trace

最新推荐文章于 2022-10-05 23:47:11 发布

wlp600

最新推荐文章于 2022-10-05 23:47:11 发布

阅读量2.3k

点赞数

分类专栏：内核之旅文章标签： struct exception graph user thread fp

本文链接：https://blog.csdn.net/wlp600/article/details/6893680

版权

内核之旅专栏收录该内容

9 篇文章 2 订阅

订阅专栏

调用链是剖析工具中常备的一种显示方式，可以为用户呈现明确的函数调用关系，在perf中，可以根据调用链分析主函数的sample分布到了哪些子函数中。在内核调试中，根据调用链可以得到出错函数的上层调用者是谁。

调用链的实现其实很简单，就是遍历函数栈，在x86中，bp寄存器指向的内存位置存放的是旧栈帧基地址，这个位置之上(高地址处)是函数返回地址，因此在函数返回时pop %ebp，就是把旧栈帧地址置放到bp寄存器中，然后pop %eip，会把函数返回地址放到程序计数器中。在进入一个新函数之前，call func，会把函数返回地址放到填入栈，并跳转到func，这时就进入了一个新栈帧，push %ebp，把函数调用者的栈帧基地址压栈，mov %ebp, %esp，这就让bp指向了新栈帧的底部，下面sp就可以动态变化改动栈空间大小了。

这么看来在新旧两个栈的边界处分别是指令返回地址和栈返回地址，旧栈底部是函数返回指令地址，新栈顶部是栈返回地址。因此在内核中定义栈帧：

/* The form of the top of the frame on the stack */
struct stack_frame {
	struct stack_frame *next_frame;
	unsigned long return_address;
};

由于栈的增长是由高到低，所以高地址就是函数指令返回地址return_address，低地址就是栈返回地址next_frame，regs->bp指向当前栈的底部，也就是这个结构体的起始地址，如果从regs->bp处读取一个stack_frame结构体，那么就可以得到上下栈帧边界处信息，包括，函数返回指令地址以及下一个栈帧地址bp。

static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
{
	int rctx;
	struct perf_callchain_entry *entry;

	entry = get_callchain_entry(&rctx);
	if (rctx == -1)	return NULL;
	if (!entry)	goto exit_put;

	entry->nr = 0;
	if (!user_mode(regs)) {
		perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
		perf_callchain_kernel(entry, regs);
		if (current->mm)
			regs = task_pt_regs(current);
		else
			regs = NULL;
	}
	if (regs) {
		perf_callchain_store(entry, PERF_CONTEXT_USER);
		perf_callchain_user(entry, regs);
	}
exit_put:
	put_callchain_entry(rctx);
	return entry;
}

在perf_callchain()中，只需要一个pt_regs *参数，判定指令地址是否处于用户态user_mode(regs) => return !!(regs->cs & 3)，对于内核态地址，要遍历内核态栈，perf_callchain_kernel()实际就是dump_trace()，x86_64有三种内核栈：process stack、interrupt stack、severe exception (double fault, nmi, stack fault, debug, mce) hardware stack。

/*
 * x86-64 can have up to three kernel stacks:
 * process stack
 * interrupt stack
 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
 */
void dump_trace(struct task_struct *task, struct pt_regs *regs,
		unsigned long *stack, unsigned long bp,
		const struct stacktrace_ops *ops, void *data)
{
	const unsigned cpu = get_cpu();
	unsigned long *irq_stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
	unsigned used = 0;
	struct thread_info *tinfo;
	int graph = 0;
	unsigned long dummy;

	if (!task)	task = current;

	if (!stack) {
		stack = &dummy;
		if (task && task != current)
			stack = (unsigned long *)task->thread.sp;
	}

	if (!bp)		bp = stack_frame(task, regs);
	/*
	 * Print function call entries in all stacks, starting at the
	 * current stack address. If the stacks consist of nested exceptions
	 */
	tinfo = task_thread_info(task);
	for (;;) {
		char *id;
		unsigned long *estack_end;
		estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id);

		if (estack_end) {
			if (ops->stack(data, id) < 0)	break;
			bp = ops->walk_stack(tinfo, stack, bp, ops, data, estack_end, &graph);
			ops->stack(data, "<EOE>");

			/* We link to the next stack via the second-to-last pointer (index -2 to end) in the exception stack: */
			stack = (unsigned long *) estack_end[-2];
			continue;
		}
		if (irq_stack_end) {
			unsigned long *irq_stack;
			irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);

			if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
				if (ops->stack(data, "IRQ") < 0)	break;
				bp = ops->walk_stack(tinfo, stack, bp, ops, data, irq_stack_end, &graph);
				/*
				 * We link to the next stack (which would be the process
				 * stack normally) the last pointer (index -1 to end) in the IRQ stack:
				 */
				stack = (unsigned long *) (irq_stack_end[-1]);
				bp = fixup_bp_irq_link(bp, stack, irq_stack, irq_stack_end);
				irq_stack_end = NULL;
				ops->stack(data, "EOI");
				continue;
			}
		}
		break;
	}

	/* This handles the process stack: */
	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
	put_cpu();
}

其中ops->walk_stack()就是遍历内核栈操作，在perf_event中是调用print_context_stack_bp()实现的，这个递归调用直至跳出内核地址空间。

unsigned long
print_context_stack_bp(struct thread_info *tinfo,
		       unsigned long *stack, unsigned long bp,
		       const struct stacktrace_ops *ops, void *data,
		       unsigned long *end, int *graph)
{
	struct stack_frame *frame = (struct stack_frame *)bp;
	unsigned long *ret_addr = &frame->return_address;

	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
		unsigned long addr = *ret_addr;

		if (!__kernel_text_address(addr))
			break;

		ops->address(data, addr, 1);
		frame = frame->next_frame;
		ret_addr = &frame->return_address;
		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
	}

	return (unsigned long)frame;
}

在遍历完内核栈后，判断当前进程是否是内核线程(内核线程的task->mm == NULL)，对于一般进程，通过系统调用或发生异常或中断进入内核栈，这时

#define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)

然后递归打印打印用户态栈信息perf_callchain_user()，这里copy_from_user_nmi(&frame, fp, sizeof(frame));就从bp寄存器指向内存地址处拷贝一个stack_frame信息，据此递归用户态栈，直至fp < regs->sp???

void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
	struct stack_frame frame;
	const void __user *fp;

	fp = (void __user *)regs->bp;
	perf_callchain_store(entry, regs->ip);

	while (entry->nr < PERF_MAX_STACK_DEPTH) {
		unsigned long bytes;
		frame.next_frame = NULL;
		frame.return_address = 0;

		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
		if (bytes != sizeof(frame))
			break;

		if ((unsigned long)fp < regs->sp)
			break;

		perf_callchain_store(entry, frame.return_address);
		fp = frame.next_frame;
	}
}

wlp600

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
call_chain & dump_trace

调用链是剖析工具中常备的一种显示方式，可以为用户呈现明确的函数调用关系，在perf中，可以根据调用链分析主函数的sample分布到了哪些子函数中。在内核调试中，根据调用链可以得到出错函数的上层调用者是谁。调用链的实现其实很简单，就是遍历函数栈，在x86中，bp寄存器指向的内存位置存放的是旧栈帧基地址，这个位置之上(高地址处)是函数返回地址，因此在函数返回时pop %ebp，就是把旧栈帧地址置放
复制链接

扫一扫