1. Abstract
.
2. Introduction
我们开发的时候很痛苦的一件事情就是在进程进入深睡眠以后,没有调试手段,往往这种问题还都是一些很难解的概率性难题,而此时又不知道这个进程到底在什么地方深睡眠了。
针对此问题,我开发了相应的驱动(请参考文章后附录的完整源代码)和应用 tool ,大家使用该方法就能够将一个进程的函数调用关系打印出来,于是假设一个进程深睡眠了,大家就可以得知它到底死在什么地方了。
3. 定位工具使用方法
3.1 准备
使用之前要先加载驱动和设备节点:
insmod getstack.ko
mknod /dev/getstack c 251 0
注:请通过如下命令 cat /proc/devices ,来查看 getstack 在你的板子上所对应的字符设备的Major 号。
3.2 使用方法
我开发了一个应用程序 tool ,大家可以直接调用,方法如下:
./getstack --help
Usage: [options] pid/name
Options:
-p |--pid Print the stack of process with process's id.
-n |--name Print the stack of process with process's name.
-a |--all Print the stack of all processes.
--daemon Fork and go into the background.
--quiet Do not print anything.
--help Print this help message.
如果要看某个进程当前的内核函数调用堆栈,可以用 ps 得到其 pid ,然后调用 ./getstack -p pid 即可。
如果要查看所有进程的内核函数调用堆栈,调用 ./getstack –a
-n 我试了一下还不行,当前还没有跟踪原因,因为 -n 可以被 -p 取代,所以我也不想再跟踪了。
4. 原理分析
4.1 定位工具tool
这个工具的原理很简单,就是调用驱动的相应接口,辅助的代码都被我省掉了:
……
void getstack_finalize()
{
int gs_result;
if(gs_fd < 0)
{
printf("fd is not opened!/n");
return;
}
gs_result = close(gs_fd);
printf("getstack_finalize want to close fd: %d, and result is: %d/n", gs_fd, gs_result);
gs_fd = -1;
}
int getstack_showall(void)
{
if (ioctl(gs_fd, PRINT_ALL_TASK, NULL) < 0) {
printf("ioctl PRINT_STACK/n");
return -1;
}
return 0;
}
int getstack_pid(int pid)
{
if (ioctl(gs_fd, PRINT_PID_TASK, &pid) < 0) {
printf("ioctl PRINT_STACK/n");
return -1;
}
return 0;
}
int getstack_pname(char * pname)
{
if (ioctl(gs_fd, PRINT_PNAME_TASK, pname) < 0) {
printf("ioctl PRINT_STACK/n");
return -1;
}
return 0;
}
int main(int argc, char *argv[])
{
……
if (daemon)
daemonize();
if (quiet)
console_quiet();
getstack_init();
if(pid > 0)
getstack_pid(pid);
else if(pid > 0)
getstack_pname(pname);
else if (all)
getstack_showall();
getstack_finalize();
}
4.2 驱动
驱动的关键代码如下,其它部分请参考我的完整源代码,在 drivers/char/getstack.c 里:
static void show_task_stack(struct task_struct *p)
{
unsigned state;
state = p->state ? __ffs(p->state) + 1 : 0;
printk("%-13.13s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
#if (BITS_PER_LONG == 32)
if (state == TASK_RUNNING)
printk(" running ");
else
printk(" %08lX ", thread_saved_pc(p));
#else
if (state == TASK_RUNNING)
printk(" running task ");
else
printk(" %016lx ", thread_saved_pc(p));
#endif
if (state != TASK_RUNNING)
show_stack(p, NULL);
}
static int getstack_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
struct task_struct *g, *p;
int pid;
char pname[100];
printk("getstack_ioctl cmd: %0x/n", cmd);
switch (cmd) {
case PRINT_ALL_TASK:
{
read_lock(&tasklist_lock);
do_each_thread(g, p) {
show_task_stack(p);
msleep(1000);/* Flush the uart buffer */
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
case PRINT_PID_TASK:
{
if (copy_from_user(&pid, (int *)arg, sizeof(int)))
return -EFAULT;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if(p->tgid == pid)
show_task_stack(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
case PRINT_PNAME_TASK:
{
if (strncpy_from_user(pname, (char *)arg, strlen_user((char *)arg)))
return -EFAULT;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if(!strcmp(p->comm, pname))
show_task_stack(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
default:
printk ( "getstack_ioctl: cmd=0x%x/n", cmd);
return -EINVAL;
}
/* Keep the compiler happy */
return 0;
}
4.3 内核原理
内核的原理其实和应用程序的 back trace 也是一样的,首先是取出该进程的内核栈的栈顶位置,然后把堆栈向上到栈底的地址和符号表比较,比上了就认为是一个函数;同时,因为 ARM 提供了 fp 寄存器,故而不用把所有的地址全都拿来比较,而只要把堆栈中的 fp 链拿过来比较就可以了:
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
mov pc, lr
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
tst r1, #0x10 @ 26 or 32-bit?
moveq mask, #0xfc000003
movne mask, #0
tst mask, r0 /*r0 是要开始 back trace 的 fp,mask 是结束 fp 的标志 */
movne r0, #0
movs frame, r0 /* 从现在开始, frame 里面存的就是当前的 fp 了 */
1: moveq r0, #-2
ldmeqfd sp!, {r4 - r8, pc}/* 如果已经到最后一个 frame 了,就返回了 */
2: stmfd sp!, {pc} @ calculate offset of PC in STMIA instruction
ldr r0, [sp], #4
adr r1, 2b - 4
sub offset, r0, r1/* 因为每个 ARM CPU 的 pc 寄存器和当前指令的偏移不一样,故而这里通过这段运算计算出来 */
3: tst frame, mask @ Check for address exceptions...
bne 1b
1001: ldr next, [frame, #-12] @ get fp /* 这是上一个 fp 的位置,即调用此函数的函数内的 fp*/
1002: ldr r2, [frame, #-4] @ get lr
1003: ldr r3, [frame, #0] @ get pc /* 这是 pc 的位置 */
sub save, r3, offset @ Correct PC for prefetching /* 这样就计算出来函数实际对应的 pc 的位置了 */
bic save, save, mask
1004: ldr r1, [save, #0] @ get instruction at function
mov r1, r1, lsr #10
ldr r3, .Ldsi+4
teq r1, r3
subeq save, save, #4
mov r0, save
bic r1, r2, mask
bl dump_backtrace_entry/* 这是一个 C 程序函数,传入的参数,一个是函数的地址,即上面计算出来的pc ;另一个是 lr ,即返回地址;这个函数我们就不深入解析了,它的主要功能就是在内核符号表里面寻找到和传入的地址符合的符号,请注意内核本身也对应一个 module 符号表 */
ldr r0, [frame, #-8] @ get sp
sub r0, r0, #4
1005: ldr r1, [save, #4] @ get instruction at function+4
mov r3, r1, lsr #10
ldr r2, .Ldsi+4 /*Ldsi 的作用我没看明白,看似是处理异常部分,使得此调用能够恢复过来,并返回 */
teq r3, r2 @ Check for stmia sp!, {args}
addeq save, save, #4 @ next instruction
bleq .Ldumpstm
sub r0, frame, #16
1006: ldr r1, [save, #4] @ Get ‘stmia sp!, {rlist, fp, ip, lr, pc}’ instruction
mov r3, r1, lsr #10
ldr r2, .Ldsi
teq r3, r2
bleq .Ldumpstm
/*
* A zero next framepointer means we’re done.
*/
teq next, #0
ldmeqfd sp!, {r4 – r8, pc}/* 处理完了,返回 */
/*
* The next framepointer must be above the
* current framepointer.
*/
cmp next, frame
mov frame, next /* 处理下一个 Frame*/
bhi 3b
b 1007f
5. 附录
5.1 T 卡拷贝DW 时的堆栈打印信息
如下是我在 T 卡拷贝 DW 时抓下的信息,因为后来系统可以恢复,我不确定是否和我们上周跟踪的问题属于同一个现象:
1449 root 1980 DW cp media media-2 –af
~/test_case # ./getstack -p 1449
[ 3587.220000] getstack_open entered
[ 3587.230000] getstack_ioctl cmd: 7390
[ 3587.230000] show_task_stack entered
[ 3587.230000] cp D C022F3F8 [<c022f0a0>] (schedule+0x0/0x6e0) from [<c0230218>] (io_schedule+0x34/0x58)
[ 3587.240000] [<c02301e4>] (io_schedule+0x0/0x58) from [<c00e50e0>] (sync_buffer+0x4c/0x54)
[ 3587.250000] r4 = C6EE795C
[ 3587.250000] [<c00e5094>] (sync_buffer+0x0/0x54) from [<c023072c>] (__wait_on_bit+0x9c/0xc0)
[ 3587.260000] [<c0230690>] (__wait_on_bit+0x0/0xc0) from [<c02307c8>] (out_of_line_wait_on_bit+0x78/0x84)
[ 3587.270000] [<c0230750>] (out_of_line_wait_on_bit+0x0/0x84) from [<c00e500c>] (__wait_on_buffer+0x28/0x30)
[ 3587.280000] r7 = 00000000 r6 = C6EE7A28 r5 = 00000723 r4 = C09E14A8
[ 3587.290000] [<c00e4fe4>] (__wait_on_buffer+0x0/0x30) from [<c00e6c54>] (__bread+0xc4/0xfc)
[ 3587.300000] [<c00e6b90>] (__bread+0x0/0xfc) from [<bf0b6370>] (fat_ent_bread+0x54/0xb8 [fat])
[ 3587.310000] r5 = 00000723 r4 = C7C5D200
[ 3587.310000] [<bf0b631c>] (fat_ent_bread+0x0/0xb8 [fat]) from [<bf0b5a74>] (fat_ent_read+0x150/0x1cc [fat])
[ 3587.320000] r8 = C7C5D200 r7 = 00037F80 r6 = C6EE7A28 r5 = BF0BD9C8
[ 3587.330000] r4 = 00000000
[ 3587.330000] [<bf0b5924>] (fat_ent_read+0x0/0x1cc [fat]) from [<bf0b23d4>] (fat_get_cluster+0x1b4/0x2f0 [fat])
[ 3587.340000] [<bf0b2220>] (fat_get_cluster+0x0/0x2f0 [fat]) from [<bf0b260c>] (fat_bmap+0xfc/0x18c [fat])
[ 3587.350000] [<bf0b2510>] (fat_bmap+0x0/0x18c [fat]) from [<bf0b78c8>] (fat_get_block+0x58/0x298 [fat])
[ 3587.360000] [<bf0b7870>] (fat_get_block+0x0/0x298 [fat]) from [<c00ed848>] (do_mpage_readpage+0x414/0x66c)
[ 3587.370000] [<c00ed434>] (do_mpage_readpage+0x0/0x66c) from [<c00ee17c>] (mpage_readpages+0x78/0x164)
[ 3587.380000] [<c00ee104>] (mpage_readpages+0x0/0x164) from [<bf0b7b44>] (fat_readpages+0x20/0x28 [fat])
[ 3587.390000] [<bf0b7b24>] (fat_readpages+0x0/0x28 [fat]) from [<c00a43bc>] (__do_page_cache_readahead+0x21c/0x348)
[ 3587.400000] [<c00a41a0>] (__do_page_cache_readahead+0x0/0x348) from [<c00a455c>] (blockable_page_cache_readahead+0x74/0xd4)
[ 3587.410000] [<c00a44e8>] (blockable_page_cache_readahead+0x0/0xd4) from [<c00a4644>] (make_ahead_window+0x88/0xb8)
[ 3587.420000] r8 = C0DDC158 r7 = 00000000 r6 = C33F8DC0 r5 = 00000000
[ 3587.420000] r4 = C6EE7DD8
[ 3587.430000] [<c00a45bc>] (make_ahead_window+0x0/0xb8) from [<c00a472c>] (page_cache_readahead+0xb8/0x1ec)
[ 3587.440000] r6 = 00000020 r5 = 00000002 r4 = C6EE7DD8
[ 3587.440000] [<c00a4674>] (page_cache_readahead+0x0/0x1ec) from [<c009d168>] (do_generic_mapping_read+0x584/0x5c0)
[ 3587.450000] [<c009cbe4>] (do_generic_mapping_read+0x0/0x5c0) from [<c009f6b8>] (generic_file_aio_read+0x110/0x214)
[ 3587.460000] [<c009f5a8>] (generic_file_aio_read+0x0/0x214) from [<c00bd758>] (do_sync_read+0xc4/0x108)
[ 3587.470000] [<c00bd694>] (do_sync_read+0x0/0x108) from [<c00be1a4>] (vfs_read+0xdc/0x178)
[ 3587.480000] r8 = C0059F24 r7 = 00002000 r6 = C6EE7F78 r5 = BEBD24F0
[ 3587.490000] r4 = C33F8DC0
[ 3587.490000] [<c00be0c8>] (vfs_read+0x0/0x178) from [<c00be628>] (sys_read+0x44/0x74)
[ 3587.500000] r7 = 00000000 r6 = 0153C000 r5 = FFFFFFF7 r4 = C33F8DC0
[ 3587.500000] [<c00be5e4>] (sys_read+0x0/0x74) from [<c0059da0>] (ret_fast_syscall+0x0/0x2c)
[ 3587.510000] r7 = 00000003 r6 = 00000004 r5 = BEBD24F0 r4 = 00002000
5.2 用PS 命令看在哪个函数里
用如下的命令也可以查看睡眠的进程在哪个函数里,其处理其实也是根据 /proc/kallsyms 来处理的。调用方法如下:
/ps -o pid,tty,addr,wchan,fname -p 1498
但是此命令只能看一层,其实意义不大,比如说如上命令看到的是:
PID TT ADDR WCHAN COMMAND
1498 ? - select wpa_supp
同时, busybox 里的 ps 不支持这些参数,要下载 PC 上的 ps 并编译运行, PC 上的 PS 可以从如下网址下载:
http://procps.sourceforge.net/
编译后将如下两个文件拷贝到设备上:
cp proc/libproc-3.2.8.so /usr/lib
cp ps/ps /usr/bin/
6. 附录:驱动源代码
/*
* Author: Huang Gao
* huanggao@gmail.com
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/cdev.h>
#include <asm/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>
struct getstack_dev
{
struct cdev cdev;
};
struct getstack_dev dev;
static int getstack_major = -1;
#define GETSTACK_IOCTL 's'
#define PRINT_ALL_TASK _IO(GETSTACK_IOCTL, 0x8b)
#define PRINT_PID_TASK _IO(GETSTACK_IOCTL, 0x90)
#define PRINT_PNAME_TASK _IO(GETSTACK_IOCTL, 0x91)
static ssize_t getstack_read (struct file * file, char __user * buf, size_t count,
loff_t * ppos)
{
printk("getstack_read entered/n");
return 0;
}
static ssize_t getstack_write (struct file * file, const char __user * buf,
size_t count, loff_t * ppos)
{
printk("getstack_write entered/n");
return 0;
}
static const char stat_nam[] = "RSDTtZX";
static void show_task_stack(struct task_struct *p)
{
unsigned state;
printk("show_task_stack entered/n");
state = p->state ? __ffs(p->state) + 1 : 0;
printk("%-13.13s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
#if (BITS_PER_LONG == 32)
if (state == TASK_RUNNING)
printk(" running ");
else
printk(" %08lX ", thread_saved_pc(p));
#else
if (state == TASK_RUNNING)
printk(" running task ");
else
printk(" %016lx ", thread_saved_pc(p));
#endif
if (state != TASK_RUNNING)
show_stack(p, NULL);
}
static int getstack_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
struct task_struct *g, *p;
int pid;
char pname[100];
printk("getstack_ioctl cmd: %0x/n", cmd);
switch (cmd) {
case PRINT_ALL_TASK:
{
read_lock(&tasklist_lock);
do_each_thread(g, p) {
show_task_stack(p);
msleep(1000);/* Flush the uart buffer */
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
case PRINT_PID_TASK:
{
if (copy_from_user(&pid, (int *)arg, sizeof(int)))
return -EFAULT;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if(p->tgid == pid)
show_task_stack(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
case PRINT_PNAME_TASK:
{
if (strncpy_from_user(pname, (char *)arg, strlen_user((char *)arg)))
return -EFAULT;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if(!strcmp(p->comm, pname))
show_task_stack(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
return 0;
}
default:
printk ( "getstack_ioctl: cmd=0x%x/n", cmd);
return -EINVAL;
}
/* Keep the compiler happy */
return 0;
}
static int getstack_open (struct inode * inode, struct file * file)
{
printk("getstack_open entered/n");
return 0;
}
static int getstack_release (struct inode * inode, struct file * file)
{
printk("getstack_release entered/n");
return 0;
}
static const struct file_operations getstack_fops = {
.owner = THIS_MODULE,
.read = getstack_read,
.write = getstack_write,
.ioctl = getstack_ioctl,
.open = getstack_open,
.release = getstack_release,
};
static int __init getstack_init (void)
{
int result;
dev_t devno;
int err;
printk("getstack_init entered/n");
result = alloc_chrdev_region(&devno, 0, 1, "getstack");
getstack_major = MAJOR(devno);
if (result < 0)
return result;
devno = MKDEV(getstack_major, 0);
cdev_init(&dev.cdev, &getstack_fops);
dev.cdev.owner = THIS_MODULE;
dev.cdev.ops = &getstack_fops;
err = cdev_add(&dev.cdev, devno, 1);
if (err)
printk(KERN_NOTICE "Error %d adding LED%d", err, 0);
return 0;
}
static void __exit getstack_cleanup (void)
{
printk("getstack_cleanup entered/n");
cdev_del(&dev.cdev);
unregister_chrdev_region(MKDEV(getstack_major, 0), 1);
}
module_init(getstack_init);
module_exit(getstack_cleanup);
MODULE_AUTHOR("Huang Gao <huanggao@gmail.com>");
MODULE_LICENSE("GPL");