实现函数调用堆栈跟踪函数
我们需要在 lab1 中完成 kdebug.c 中函数 print_stackframe 的实现,可以通过函数 print_stackframe 来跟踪函数调用堆栈中记录的返回地址。在如果能够正确实现此函数,可在 lab1 中执行 “make qemu”后,在 qemu 模拟器中得到类似如下的输出:
……
ebp:0x00007b28 eip:0x00100992 args:0x00010094 0x00010094 0x00007b58 0x00100096
kern/debug/kdebug.c:305: print_stackframe+22
ebp:0x00007b38 eip:0x00100c79 args:0x00000000 0x00000000 0x00000000 0x00007ba8
kern/debug/kmonitor.c:125: mon_backtrace+10
ebp:0x00007b58 eip:0x00100096 args:0x00000000 0x00007b80 0xffff0000 0x00007b84
kern/init/init.c:48: grade_backtrace2+33
ebp:0x00007b78 eip:0x001000bf args:0x00000000 0xffff0000 0x00007ba4 0x00000029
kern/init/init.c:53: grade_backtrace1+38
ebp:0x00007b98 eip:0x001000dd args:0x00000000 0x00100000 0xffff0000 0x0000001d
kern/init/init.c:58: grade_backtrace0+23
ebp:0x00007bb8 eip:0x00100102 args:0x0010353c 0x00103520 0x00001308 0x00000000
kern/init/init.c:63: grade_backtrace+34
ebp:0x00007be8 eip:0x00100059 args:0x00000000 0x00000000 0x00000000 0x00007c53
kern/init/init.c:28: kern_init+88
ebp:0x00007bf8 eip:0x00007d73 args:0xc031fcfa 0xc08ed88e 0x64e4d08e 0xfa7502a8
<unknow>: -- 0x00007d72 –
……
完成实验,看输出结果是否与上述显示大致一致,并解释最后一行各个数值的含义。
kdebug.c原始文件
#include <defs.h>
#include <x86.h>
#include <stab.h>
#include <stdio.h>
#include <string.h>
#include <kdebug.h>
#define STACKFRAME_DEPTH 20
extern const struct stab __STAB_BEGIN__[]; // beginning of stabs table
extern const struct stab __STAB_END__[]; // end of stabs table
extern const char __STABSTR_BEGIN__[]; // beginning of string table
extern const char __STABSTR_END__[]; // end of string table
/* debug information about a particular instruction pointer */
struct eipdebuginfo {
const char *eip_file; // source code filename for eip
int eip_line; // source code line number for eip
const char *eip_fn_name; // name of function containing eip
int eip_fn_namelen; // length of function's name
uintptr_t eip_fn_addr; // start address of function
int eip_fn_narg; // number of function arguments
};
/* *
* stab_binsearch - according to the input, the initial value of
* range [*@region_left, *@region_right], find a single stab entry
* that includes the address @addr and matches the type @type,
* and then save its boundary to the locations that pointed
* by @region_left and @region_right.
*
* Some stab types are arranged in increasing order by instruction address.
* For example, N_FUN stabs (stab entries with n_type == N_FUN), which
* mark functions, and N_SO stabs, which mark source files.
*
* Given an instruction address, this function finds the single stab entry
* of type @type that contains that address.
*
* The search takes place within the range [*@region_left, *@region_right].
* Thus, to search an entire set of N stabs, you might do:
*
* left = 0;
* right = N - 1; (rightmost stab)
* stab_binsearch(stabs, &left, &right, type, addr);
*
* The search modifies *region_left and *region_right to bracket the @addr.
* *@region_left points to the matching stab that contains @addr,
* and *@region_right points just before the next stab.
* If *@region_left > *region_right, then @addr is not contained in any
* matching stab.
*
* For example, given these N_SO stabs:
* Index Type Address
* 0 SO f0100000
* 13 SO f0100040
* 117 SO f0100176
* 118 SO f0100178
* 555 SO f0100652
* 556 SO f0100654
* 657 SO f0100849
* this code:
* left = 0, right = 657;
* stab_binsearch(stabs, &left, &right, N_SO, 0xf0100184);
* will exit setting left = 118, right = 554.
* */
static void
stab_binsearch(const struct stab *stabs, int *region_left, int *region_right,
int type, uintptr_t addr) {
int l = *region_left, r = *region_right, any_matches = 0;
while (l <= r) {
int true_m = (l + r) / 2, m = true_m;
// search for earliest stab with right type
while (m >= l && stabs[m].n_type != type) {
m --;
}
if (m < l) { // no match in [l, m]
l = true_m + 1;
continue;
}
// actual binary search
any_matches = 1;
if (stabs[m].n_value < addr) {
*region_left = m;
l = true_m + 1;
} else if (stabs[m].n_value > addr) {
*region_right = m - 1;
r = m - 1;
} else {
// exact match for 'addr', but continue loop to find
// *region_right
*region_left = m;
l = m;
addr ++;
}
}
if (!any_matches) {
*region_right = *region_left - 1;
}
else {
// find rightmost region containing 'addr'
l = *region_right;
for (; l > *region_left && stabs[l].n_type != type; l --)
/* do nothing */;
*region_left = l;
}
}
/* *
* debuginfo_eip - Fill in the @info structure with information about
* the specified instruction address, @addr. Returns 0 if information
* was found, and negative if not. But even if it returns negative it
* has stored some information into '*info'.
* */
int
debuginfo_eip(uintptr_t addr, struct eipdebuginfo *info) {
const struct stab *stabs, *stab_end;
const char *stabstr, *stabstr_end;
info->eip_file = "<unknown>";
info->eip_line = 0;
info->eip_fn_name = "<unknown>";
info->eip_fn_namelen = 9;
info->eip_fn_addr = addr;
info->eip_fn_narg = 0;
stabs = __STAB_BEGIN__;
stab_end = __STAB_END__;
stabstr = __STABSTR_BEGIN__;
stabstr_end = __STABSTR_END__;
// String table validity checks
if (stabstr_end <= stabstr || stabstr_end[-1] != 0) {
return -1;
}
// Now we find the right stabs that define the function containing
// 'eip'. First, we find the basic source file containing 'eip'.
// Then, we look in that source file for the function. Then we look
// for the line number.
// Search the entire set of stabs for the source file (type N_SO).
int lfile = 0, rfile = (stab_end - stabs) - 1;
stab_binsearch(stabs, &lfile, &rfile, N_SO, addr);
if (lfile == 0)
return -1;
// Search within that file's stabs for the function definition
// (N_FUN).
int lfun = lfile, rfun = rfile;
int lline, rline;
stab_binsearch(stabs, &lfun, &rfun, N_FUN, addr);
if (lfun <= rfun) {
// stabs[lfun] points to the function name
// in the string table, but check bounds just in case.
if (stabs[lfun].n_strx < stabstr_end - stabstr) {
info->eip_fn_name = stabstr + stabs[lfun].n_strx;
}
info->eip_fn_addr = stabs[lfun].n_value;
addr -= info->eip_fn_addr;
// Search within the function definition for the line number.
lline = lfun;
rline = rfun;
} else {
// Couldn't find function stab! Maybe we're in an assembly
// file. Search the whole file for the line number.
info->eip_fn_addr = addr;
lline = lfile;
rline = rfile;
}
info->eip_fn_namelen = strfind(info->eip_fn_name, ':') - info->eip_fn_name;
// Search within [lline, rline] for the line number stab.
// If found, set info->eip_line to the right line number.
// If not found, return -1.
stab_binsearch(stabs, &lline, &rline, N_SLINE, addr);
if (lline <= rline) {
info->eip_line = stabs[rline].n_desc;
} else {
return -1;
}
// Search backwards from the line number for the relevant filename stab.
// We can't just use the "lfile" stab because inlined functions
// can interpolate code from a different file!
// Such included source files use the N_SOL stab type.
while (lline >= lfile
&& stabs[lline].n_type != N_SOL
&& (stabs[lline].n_type != N_SO || !stabs[lline].n_value)) {
lline --;
}
if (lline >= lfile && stabs[lline].n_strx < stabstr_end - stabstr) {
info->eip_file = stabstr + stabs[lline].n_strx;
}
// Set eip_fn_narg to the number of arguments taken by the function,
// or 0 if there was no containing function.
if (lfun < rfun) {
for (lline = lfun + 1;
lline < rfun && stabs[lline].n_type == N_PSYM;
lline ++) {
info->eip_fn_narg ++;
}
}
return 0;
}
/* *
* print_kerninfo - print the information about kernel, including the location
* of kernel entry, the start addresses of data and text segements, the start
* address of free memory and how many memory that kernel has used.
* */
void
print_kerninfo(void) {
extern char etext[], edata[], end[], kern_init[];
cprintf("Special kernel symbols:\n");
cprintf(" entry 0x%08x (phys)\n", kern_init);
cprintf(" etext 0x%08x (phys)\n", etext);
cprintf(" edata 0x%08x (phys)\n", edata);
cprintf(" end 0x%08x (phys)\n", end);
cprintf("Kernel executable memory footprint: %dKB\n", (end - kern_init + 1023)/1024);
}
/* *
* print_debuginfo - read and print the stat information for the address @eip,
* and info.eip_fn_addr should be the first address of the related function.
* */
void
print_debuginfo(uintptr_t eip) {
struct eipdebuginfo info;
if (debuginfo_eip(eip, &info) != 0) {
cprintf(" <unknow>: -- 0x%08x --\n", eip);
}
else {
char fnname[256];
int j;
for (j = 0; j < info.eip_fn_namelen; j ++) {
fnname[j] = info.eip_fn_name[j];
}
fnname[j] = '\0';
cprintf(" %s:%d: %s+%d\n", info.eip_file, info.eip_line,
fnname, eip - info.eip_fn_addr);
}
}
static __noinline uint32_t
read_eip(void) {
uint32_t eip;
asm volatile("movl 4(%%ebp), %0" : "=r" (eip));
return eip;
}
/* *
* print_stackframe - print a list of the saved eip values from the nested 'call'
* instructions that led to the current point of execution
*
* The x86 stack pointer, namely esp, points to the lowest location on the stack
* that is currently in use. Everything below that location in stack is free. Pushing
* a value onto the stack will invole decreasing the stack pointer and then writing
* the value to the place that stack pointer pointes to. And popping a value do the
* opposite.
*
* The ebp (base pointer) register, in contrast, is associated with the stack
* primarily by software convention. On entry to a C function, the function's
* prologue code normally saves the previous function's base pointer by pushing
* it onto the stack, and then copies the current esp value into ebp for the duration
* of the function. If all the functions in a program obey this convention,
* then at any given point during the program's execution, it is possible to trace
* back through the stack by following the chain of saved ebp pointers and determining
* exactly what nested sequence of function calls caused this particular point in the
* program to be reached. This capability can be particularly useful, for example,
* when a particular function causes an assert failure or panic because bad arguments
* were passed to it, but you aren't sure who passed the bad arguments. A stack
* backtrace lets you find the offending function.
*
* The inline function read_ebp() can tell us the value of current ebp. And the
* non-inline function read_eip() is useful, it can read the value of current eip,
* since while calling this function, read_eip() can read the caller's eip from
* stack easily.
*
* In print_debuginfo(), the function debuginfo_eip() can get enough information about
* calling-chain. Finally print_stackframe() will trace and print them for debugging.
*
* Note that, the length of ebp-chain is limited. In boot/bootasm.S, before jumping
* to the kernel entry, the value of ebp has been set to zero, that's the boundary.
* */
void
print_stackframe(void) {
/* LAB1 YOUR CODE : STEP 1 */
/* (1) call read_ebp() to get the value of ebp. the type is (uint32_t);
* (2) call read_eip() to get the value of eip. the type is (uint32_t);
* (3) from 0 .. STACKFRAME_DEPTH
* (3.1) printf value of ebp, eip
* (3.2) (uint32_t)calling arguments [0..4] = the contents in address (unit32_t)ebp +2 [0..4]
* (3.3) cprintf("\n");
* (3.4) call print_debuginfo(eip-1) to print the C calling function name and line number, etc.
* (3.5) popup a calling stackframe
* NOTICE: the calling funciton's return addr eip = ss:[ebp+4]
* the calling funciton's ebp = ss:[ebp]
*/
uint32_t ebp = read_ebp(), eip = read_eip();
int i, j;
for (i = 0; ebp != 0 && i < STACKFRAME_DEPTH; i ++) {
cprintf("ebp:0x%08x eip:0x%08x args:", ebp, eip);
uint32_t *args = (uint32_t *)ebp + 2;
for (j = 0; j < 4; j ++) {
cprintf("0x%08x ", args[j]);
}
cprintf("\n");
print_debuginfo(eip - 1);
eip = ((uint32_t *)ebp)[1];
ebp = ((uint32_t *)ebp)[0];
}
}
- 首先定义两个局部变量ebp、esp分别存放ebp、esp寄存器的值。这里将ebp定义为指针,是为了方便后面取ebp寄存器的值。
- 调用read_ebp函数来获取执行print_stackframe函数时ebp寄存器的值,这里read_ebp必须定义为inline函数,否则获取的是执行read_ebp函数时的ebp寄存器的值。
- 调用read_eip函数来获取当前指令的位置,也就是此时eip寄存器的值。这里read_eip必须定义为常规函数而不是inline函数,因为这样的话在调用read_eip时会把当前指令的下一条指令的地址(也就是eip寄存器的值)压栈,那么在进入read_eip函数内部后便可以从栈中获取到调用前eip寄存器的值。
- 由于变量eip存放的是下一条指令的地址,因此将变量eip的值减去1,得到的指令地址就属于当前指令的范围了。由于只要输入的地址属于当前指令的起始和结束位置之间,print_debuginfo都能搜索到当前指令,因此这里减去1即可。
- 以后变量eip的值就不能再调用read_eip来获取了(每次调用获取的值都是相同的),而应该从ebp寄存器指向栈中的位置再往上一个单位中获取。
- 由于ebp寄存器指向栈中的位置存放的是调用者的ebp寄存器的值,据此可以继续顺藤摸瓜,不断回溯,直到ebp寄存器的值变为0
代码完善如下:
uint32_t ebp=read_ebp();
uint32_t eip=read_eip();
int i;
for(i=0;i<STACKFRAME_DEPTH&&ebp!=0;i++)
{
//(3) from 0 .. STACKFRAME_DEPTH
cprintf("ebp:0x%08x eip:0x%08x ",ebp,eip);//(3.1)printf value of ebp, eip
uint32_t *args=(uint32_t *)ebp+2;//此时args指向了ss:[ebp+8]的位置 此处存放着参数
cprintf("arg :0x%08x 0x%08x 0x%08x 0x%08x\n",*(args+0),* (args+1),*(args+2),*(args+3));/
print_debuginfo(eip-1); //打印eip以及ebp相关的信息
eip=((uint32_t *)ebp+1);//此时eip指向了返回地址
ebp=((uint32_t *)ebp+0);//ebp指向了原ebp的位置
运行结果:
解释最后一行各个参数的含义
最后一行是ebp:0x00007bf8 eip:0x00007d6e args:0xc031fcfa 0xc08ed88e 0x64e4d08e 0xfa7502a8,共有ebp,eip和args三类参数,下面分别给出解释。
ebp:0x0007bf8:
此时ebp的值是kern_init函数的栈顶地址,从obj/bootblock.asm文件中知道整个栈的栈顶地址为0x00007c00,ebp指向的栈位置存放调用者的ebp寄存器的值,ebp+4指向的栈位置存放返回地址的值,这意味着kern_init函数的调用者(也就是bootmain函数)没有传递任何输入参数给它!因为单是存放旧的ebp、返回地址已经占用8字节了。
eip:0x00007d6e:
eip的值是kern_init函数的返回地址,也就是bootmain函数调用kern_init对应的指令的下一条指令的地址。这与obj/bootblock.asm是相符合的。
7d6c: ff d0 call *%eax
7d6e: ba 00 8a ff ff mov $0xffff8a00,%edx
args:0xc031fcfa 0xc08ed88e 0x64e4d08e 0xfa7502a8
一般来说,args存放的4个dword是对应4个输入参数的值。但这里比较特殊,由于bootmain函数调用kern_init并没传递任何输入参数,并且栈顶的位置恰好在boot loader第一条指令存放的地址的上面,而args恰好是kern_int的ebp寄存器指向的栈顶往上第2~5个单元,因此args存放的就是boot loader指令的前16个字节!可以对比obj/bootblock.asm文件来验证(验证时要注意系统是小端字节序)。
00007c00 :
7c00: fa cli
7c01: fc cld
7c02: 31 c0 xor %eax,%eax
7c04: 8e d8 mov %eax,%ds
7c06: 8e c0 mov %eax,%es
7c08: 8e d0 mov %eax,%ss
7c0a: e4 64 in $0x64,%al
7c0c: a8 02 test $0x2,%al
7c0e: 75 fa jne 7c0a <seta20.1>
其对应的是第一个使用堆栈的函数,bootmain.c中的bootmain。(因为此时ebp对应地址的值为0)
bootloader设置的堆栈从0x7c00开始,使用”call bootmain”转入bootmain函数。
call指令压栈,所以bootmain中ebp为0x7bf8。