转载_如何得知深睡眠等各类进程的函数堆栈 -- DW-SW等

1.               Abstract

.

2.               Introduction

我们开发的时候很痛苦的一件事情就是在进程进入深睡眠以后,没有调试手段,往往这种问题还都是一些很难解的概率性难题,而此时又不知道这个进程到底在什么地方深睡眠了。

针对此问题,我开发了相应的驱动(请参考文章后附录的完整源代码)和应用 tool ,大家使用该方法就能够将一个进程的函数调用关系打印出来,于是假设一个进程深睡眠了,大家就可以得知它到底死在什么地方了。

3.               定位工具使用方法

3.1        准备

使用之前要先加载驱动和设备节点:

insmod getstack.ko

mknod /dev/getstack c 251 0

注:请通过如下命令 cat /proc/devices ,来查看 getstack 在你的板子上所对应的字符设备的Major 号。

3.2        使用方法

我开发了一个应用程序 tool ,大家可以直接调用,方法如下:

./getstack --help

Usage: [options] pid/name

Options:

  -p |--pid          Print the stack of process with process's id.

  -n |--name      Print the stack of process with process's name.

  -a |--all           Print the stack of all processes.

  --daemon         Fork and go into the background.

  --quiet             Do not print anything.

  --help              Print this help message.

如果要看某个进程当前的内核函数调用堆栈,可以用 ps 得到其 pid ,然后调用 ./getstack -p pid 即可。

如果要查看所有进程的内核函数调用堆栈,调用 ./getstack –a

-n 我试了一下还不行,当前还没有跟踪原因,因为 -n 可以被 -p 取代,所以我也不想再跟踪了。

4.               原理分析

4.1        定位工具tool

这个工具的原理很简单,就是调用驱动的相应接口,辅助的代码都被我省掉了:

……

 

void getstack_finalize()

{

       int gs_result;

 

       if(gs_fd < 0)

       {

              printf("fd is not opened!/n");

              return;

       }

       gs_result = close(gs_fd);

       printf("getstack_finalize want to close fd: %d, and result is: %d/n", gs_fd, gs_result);

       gs_fd = -1;

}

 

int getstack_showall(void)

{

 

       if (ioctl(gs_fd, PRINT_ALL_TASK, NULL) < 0) {

              printf("ioctl PRINT_STACK/n");

              return -1;

       }

 

       return 0;

}

 

int getstack_pid(int  pid)

{

       if (ioctl(gs_fd, PRINT_PID_TASK, &pid) < 0) {

              printf("ioctl PRINT_STACK/n");

              return -1;

       }

 

       return 0;

}

 

int getstack_pname(char *  pname)

{

       if (ioctl(gs_fd, PRINT_PNAME_TASK, pname) < 0) {

              printf("ioctl PRINT_STACK/n");

              return -1;

       }

 

       return 0;

}

 

int main(int argc, char *argv[])

{

       ……

       if (daemon)

              daemonize();

       if (quiet)

              console_quiet();

       getstack_init();

       if(pid > 0)

              getstack_pid(pid);

       else if(pid > 0)

              getstack_pname(pname);

       else  if (all)

              getstack_showall();

       getstack_finalize();

}

4.2        驱动

驱动的关键代码如下,其它部分请参考我的完整源代码,在 drivers/char/getstack.c 里:

static void show_task_stack(struct task_struct *p)

{

       unsigned state;

       state = p->state ? __ffs(p->state) + 1 : 0;

       printk("%-13.13s %c", p->comm,

              state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');

#if (BITS_PER_LONG == 32)

       if (state == TASK_RUNNING)

              printk(" running ");

       else

              printk(" %08lX ", thread_saved_pc(p));

#else

       if (state == TASK_RUNNING)

              printk("  running task   ");

       else

              printk(" %016lx ", thread_saved_pc(p));

#endif

       if (state != TASK_RUNNING)

              show_stack(p, NULL);

}

 

static int getstack_ioctl(struct inode *inode, struct file *file,

                  unsigned int cmd, unsigned long arg)

{

       struct task_struct *g, *p;

       int pid;

       char pname[100];

       printk("getstack_ioctl cmd: %0x/n", cmd);

       switch (cmd) {

              case PRINT_ALL_TASK:

              {

                     read_lock(&tasklist_lock);

                     do_each_thread(g, p) {

                            show_task_stack(p);

                            msleep(1000);/* Flush the uart buffer */

                     } while_each_thread(g, p);

                     read_unlock(&tasklist_lock);

                     return 0;

              }

              case PRINT_PID_TASK:

              {

                     if (copy_from_user(&pid, (int *)arg,  sizeof(int)))

                            return -EFAULT;

                     read_lock(&tasklist_lock);

                     do_each_thread(g, p) {

                            if(p->tgid == pid)

                                   show_task_stack(p);

                     } while_each_thread(g, p);

                     read_unlock(&tasklist_lock);

                     return 0;

              }

              case PRINT_PNAME_TASK:

              {

                     if (strncpy_from_user(pname, (char *)arg,  strlen_user((char *)arg)))

                            return -EFAULT;

                     read_lock(&tasklist_lock);

                     do_each_thread(g, p) {

                            if(!strcmp(p->comm, pname))

                                   show_task_stack(p);

                     } while_each_thread(g, p);

                     read_unlock(&tasklist_lock);

                     return 0;

              }

              default:

                     printk (  "getstack_ioctl:  cmd=0x%x/n",        cmd);

                     return -EINVAL;

       }

 

       /* Keep the compiler happy */

       return 0;

}

4.3        内核原理

内核的原理其实和应用程序的 back trace 也是一样的,首先是取出该进程的内核栈的栈顶位置,然后把堆栈向上到栈底的地址和符号表比较,比上了就认为是一个函数;同时,因为 ARM 提供了 fp 寄存器,故而不用把所有的地址全都拿来比较,而只要把堆栈中的 fp 链拿过来比较就可以了:

ENTRY(c_backtrace)

 

#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)

              mov       pc, lr

#else

 

              stmfd     sp!, {r4 - r8, lr}   @ Save an extra register so we have a location...

              tst   r1, #0x10             @ 26 or 32-bit?

              moveq   mask, #0xfc000003

              movne   mask, #0

              tst   mask, r0 /*r0 是要开始 back trace  fp,mask 是结束 fp 的标志 */

              movne   r0, #0

              movs      frame, r0  /* 从现在开始, frame 里面存的就是当前的 fp  */

1:           moveq   r0, #-2

              ldmeqfd       sp!, {r4 - r8, pc}/* 如果已经到最后一个 frame 了,就返回了 */

 

2:           stmfd     sp!, {pc}              @ calculate offset of PC in STMIA instruction

              ldr   r0, [sp], #4

              adr  r1, 2b - 4

              sub  offset, r0, r1/* 因为每个 ARM CPU  pc 寄存器和当前指令的偏移不一样,故而这里通过这段运算计算出来 */

 

3:           tst   frame, mask         @ Check for address exceptions...

              bne 1b

 

1001:            ldr   next, [frame, #-12]     @ get fp /* 这是上一个 fp 的位置,即调用此函数的函数内的 fp*/

1002:            ldr   r2, [frame, #-4]    @ get lr

1003:            ldr   r3, [frame, #0]            @ get pc /* 这是 pc 的位置 */

              sub  save, r3, offset    @ Correct PC for prefetching /* 这样就计算出来函数实际对应的 pc 的位置了 */

              bic  save, save, mask

1004:            ldr   r1, [save, #0]              @ get instruction at function

              mov       r1, r1, lsr #10

              ldr   r3, .Ldsi+4

              teq  r1, r3

              subeq     save, save, #4

              mov       r0, save

              bic  r1, r2, mask

              bl    dump_backtrace_entry/* 这是一个 C 程序函数,传入的参数,一个是函数的地址,即上面计算出来的pc ;另一个是 lr ,即返回地址;这个函数我们就不深入解析了,它的主要功能就是在内核符号表里面寻找到和传入的地址符合的符号,请注意内核本身也对应一个 module 符号表 */

 

              ldr   r0, [frame, #-8]    @ get sp

              sub  r0, r0, #4

1005:            ldr   r1, [save, #4]              @ get instruction at function+4

              mov       r3, r1, lsr #10

              ldr   r2, .Ldsi+4 /*Ldsi 的作用我没看明白,看似是处理异常部分,使得此调用能够恢复过来,并返回 */

              teq  r3, r2                    @ Check for stmia sp!, {args}

              addeq    save, save, #4             @ next instruction

              bleq       .Ldumpstm

 

              sub  r0, frame, #16

1006:            ldr   r1, [save, #4]              @ Get ‘stmia sp!, {rlist, fp, ip, lr, pc}’ instruction

              mov       r3, r1, lsr #10

              ldr   r2, .Ldsi

              teq  r3, r2

              bleq       .Ldumpstm

 

              /*

                * A zero next framepointer means we’re done.

                */

              teq  next, #0

              ldmeqfd       sp!, {r4 – r8, pc}/* 处理完了,返回 */

 

              /*

                * The next framepointer must be above the

                * current framepointer.

                */

              cmp       next, frame

              mov       frame, next /* 处理下一个 Frame*/

              bhi  3b

              b     1007f

5.               附录

5.1        T 卡拷贝DW 时的堆栈打印信息

如下是我在 T 卡拷贝 DW 时抓下的信息,因为后来系统可以恢复,我不确定是否和我们上周跟踪的问题属于同一个现象:

1449 root       1980 DW  cp media media-2 –af

 

~/test_case # ./getstack -p 1449

[ 3587.220000] getstack_open entered

[ 3587.230000] getstack_ioctl cmd: 7390

[ 3587.230000] show_task_stack entered

[ 3587.230000] cp            D C022F3F8 [<c022f0a0>] (schedule+0x0/0x6e0) from [<c0230218>] (io_schedule+0x34/0x58)

[ 3587.240000] [<c02301e4>] (io_schedule+0x0/0x58) from [<c00e50e0>] (sync_buffer+0x4c/0x54)

[ 3587.250000]  r4 = C6EE795C

[ 3587.250000] [<c00e5094>] (sync_buffer+0x0/0x54) from [<c023072c>] (__wait_on_bit+0x9c/0xc0)

[ 3587.260000] [<c0230690>] (__wait_on_bit+0x0/0xc0) from [<c02307c8>] (out_of_line_wait_on_bit+0x78/0x84)

[ 3587.270000] [<c0230750>] (out_of_line_wait_on_bit+0x0/0x84) from [<c00e500c>] (__wait_on_buffer+0x28/0x30)

[ 3587.280000]  r7 = 00000000  r6 = C6EE7A28  r5 = 00000723  r4 = C09E14A8

[ 3587.290000] [<c00e4fe4>] (__wait_on_buffer+0x0/0x30) from [<c00e6c54>] (__bread+0xc4/0xfc)

[ 3587.300000] [<c00e6b90>] (__bread+0x0/0xfc) from [<bf0b6370>] (fat_ent_bread+0x54/0xb8 [fat])

[ 3587.310000]  r5 = 00000723  r4 = C7C5D200

[ 3587.310000] [<bf0b631c>] (fat_ent_bread+0x0/0xb8 [fat]) from [<bf0b5a74>] (fat_ent_read+0x150/0x1cc [fat])

[ 3587.320000]  r8 = C7C5D200  r7 = 00037F80  r6 = C6EE7A28  r5 = BF0BD9C8

[ 3587.330000]  r4 = 00000000

[ 3587.330000] [<bf0b5924>] (fat_ent_read+0x0/0x1cc [fat]) from [<bf0b23d4>] (fat_get_cluster+0x1b4/0x2f0 [fat])

[ 3587.340000] [<bf0b2220>] (fat_get_cluster+0x0/0x2f0 [fat]) from [<bf0b260c>] (fat_bmap+0xfc/0x18c [fat])

[ 3587.350000] [<bf0b2510>] (fat_bmap+0x0/0x18c [fat]) from [<bf0b78c8>] (fat_get_block+0x58/0x298 [fat])

[ 3587.360000] [<bf0b7870>] (fat_get_block+0x0/0x298 [fat]) from [<c00ed848>] (do_mpage_readpage+0x414/0x66c)

[ 3587.370000] [<c00ed434>] (do_mpage_readpage+0x0/0x66c) from [<c00ee17c>] (mpage_readpages+0x78/0x164)

[ 3587.380000] [<c00ee104>] (mpage_readpages+0x0/0x164) from [<bf0b7b44>] (fat_readpages+0x20/0x28 [fat])

[ 3587.390000] [<bf0b7b24>] (fat_readpages+0x0/0x28 [fat]) from [<c00a43bc>] (__do_page_cache_readahead+0x21c/0x348)

[ 3587.400000] [<c00a41a0>] (__do_page_cache_readahead+0x0/0x348) from [<c00a455c>] (blockable_page_cache_readahead+0x74/0xd4)

[ 3587.410000] [<c00a44e8>] (blockable_page_cache_readahead+0x0/0xd4) from [<c00a4644>] (make_ahead_window+0x88/0xb8)

[ 3587.420000]  r8 = C0DDC158  r7 = 00000000  r6 = C33F8DC0  r5 = 00000000

[ 3587.420000]  r4 = C6EE7DD8

[ 3587.430000] [<c00a45bc>] (make_ahead_window+0x0/0xb8) from [<c00a472c>] (page_cache_readahead+0xb8/0x1ec)

[ 3587.440000]  r6 = 00000020  r5 = 00000002  r4 = C6EE7DD8

[ 3587.440000] [<c00a4674>] (page_cache_readahead+0x0/0x1ec) from [<c009d168>] (do_generic_mapping_read+0x584/0x5c0)

[ 3587.450000] [<c009cbe4>] (do_generic_mapping_read+0x0/0x5c0) from [<c009f6b8>] (generic_file_aio_read+0x110/0x214)

[ 3587.460000] [<c009f5a8>] (generic_file_aio_read+0x0/0x214) from [<c00bd758>] (do_sync_read+0xc4/0x108)

[ 3587.470000] [<c00bd694>] (do_sync_read+0x0/0x108) from [<c00be1a4>] (vfs_read+0xdc/0x178)

[ 3587.480000]  r8 = C0059F24  r7 = 00002000  r6 = C6EE7F78  r5 = BEBD24F0

[ 3587.490000]  r4 = C33F8DC0

[ 3587.490000] [<c00be0c8>] (vfs_read+0x0/0x178) from [<c00be628>] (sys_read+0x44/0x74)

[ 3587.500000]  r7 = 00000000  r6 = 0153C000  r5 = FFFFFFF7  r4 = C33F8DC0

[ 3587.500000] [<c00be5e4>] (sys_read+0x0/0x74) from [<c0059da0>] (ret_fast_syscall+0x0/0x2c)

[ 3587.510000]  r7 = 00000003  r6 = 00000004  r5 = BEBD24F0  r4 = 00002000

5.2        用PS 命令看在哪个函数里

用如下的命令也可以查看睡眠的进程在哪个函数里,其处理其实也是根据 /proc/kallsyms 来处理的。调用方法如下:

/ps -o pid,tty,addr,wchan,fname -p  1498

但是此命令只能看一层,其实意义不大,比如说如上命令看到的是:

  PID TT       ADDR WCHAN  COMMAND

  1498 ?           - select wpa_supp

同时, busybox 里的 ps 不支持这些参数,要下载 PC 上的 ps 并编译运行, PC 上的 PS 可以从如下网址下载:

http://procps.sourceforge.net/

编译后将如下两个文件拷贝到设备上:

cp proc/libproc-3.2.8.so /usr/lib

cp ps/ps /usr/bin/

6.      附录:驱动源代码

/*
 * Author: Huang Gao
 * huanggao@gmail.com
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/cdev.h>
#include <asm/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>

struct getstack_dev
{
    struct cdev cdev;
};

struct getstack_dev dev;

static int getstack_major = -1;

#define GETSTACK_IOCTL        's'
#define PRINT_ALL_TASK        _IO(GETSTACK_IOCTL, 0x8b)
#define PRINT_PID_TASK        _IO(GETSTACK_IOCTL, 0x90)
#define PRINT_PNAME_TASK    _IO(GETSTACK_IOCTL, 0x91)

static ssize_t getstack_read (struct file * file, char __user * buf, size_t count,
            loff_t * ppos)
{
    printk("getstack_read entered/n");
    return 0;
}

static ssize_t getstack_write (struct file * file, const char __user * buf,
             size_t count, loff_t * ppos)
{
    printk("getstack_write entered/n");
    return 0;
}

static const char stat_nam[] = "RSDTtZX";

static void show_task_stack(struct task_struct *p)
{
    unsigned state;
    printk("show_task_stack entered/n");

    state = p->state ? __ffs(p->state) + 1 : 0;
    printk("%-13.13s %c", p->comm,
        state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
#if (BITS_PER_LONG == 32)
    if (state == TASK_RUNNING)
        printk(" running ");
    else
        printk(" %08lX ", thread_saved_pc(p));
#else
    if (state == TASK_RUNNING)
        printk("  running task   ");
    else
        printk(" %016lx ", thread_saved_pc(p));
#endif
    if (state != TASK_RUNNING)
        show_stack(p, NULL);
}

static int getstack_ioctl(struct inode *inode, struct file *file,
            unsigned int cmd, unsigned long arg)
{
    struct task_struct *g, *p;
    int pid; 
    char pname[100];
    printk("getstack_ioctl cmd: %0x/n", cmd);
    switch (cmd) {
        case PRINT_ALL_TASK:
        {
            read_lock(&tasklist_lock);
            do_each_thread(g, p) {
                show_task_stack(p);
                msleep(1000);/* Flush the uart buffer */
            } while_each_thread(g, p);
            read_unlock(&tasklist_lock);
            return 0;
        }
        case PRINT_PID_TASK:
        {
            if (copy_from_user(&pid, (int *)arg,  sizeof(int)))
                return -EFAULT;
            read_lock(&tasklist_lock);
            do_each_thread(g, p) {
                if(p->tgid == pid)
                    show_task_stack(p);
            } while_each_thread(g, p);
            read_unlock(&tasklist_lock);
            return 0;
        }
        case PRINT_PNAME_TASK:
        {
            if (strncpy_from_user(pname, (char *)arg,  strlen_user((char *)arg)))
                return -EFAULT;
            read_lock(&tasklist_lock);
            do_each_thread(g, p) {
                if(!strcmp(p->comm, pname))
                    show_task_stack(p);
            } while_each_thread(g, p);
            read_unlock(&tasklist_lock);
            return 0;
        }
        default:
            printk (  "getstack_ioctl:  cmd=0x%x/n",     cmd);
            return -EINVAL;
    }

    /* Keep the compiler happy */
    return 0;
}

static int getstack_open (struct inode * inode, struct file * file)
{
    printk("getstack_open entered/n");
    return 0;
}

static int getstack_release (struct inode * inode, struct file * file)
{
    printk("getstack_release entered/n");
    return 0;
}

static const struct file_operations getstack_fops = {
    .owner        = THIS_MODULE,
    .read        = getstack_read,
    .write        = getstack_write,
    .ioctl        = getstack_ioctl,
    .open        = getstack_open,
    .release    = getstack_release,
};

static int __init getstack_init (void)
{
    int result;
    dev_t devno;
    int err;
    printk("getstack_init entered/n");

    result = alloc_chrdev_region(&devno, 0, 1, "getstack");
    getstack_major = MAJOR(devno);
    
    if (result < 0)
        return result;
    
    devno = MKDEV(getstack_major, 0);
    cdev_init(&dev.cdev, &getstack_fops);
    dev.cdev.owner = THIS_MODULE;
    dev.cdev.ops = &getstack_fops;
    err = cdev_add(&dev.cdev, devno, 1);
    if (err)
        printk(KERN_NOTICE "Error %d adding LED%d", err, 0);
    return 0;
}

static void __exit getstack_cleanup (void)
{
    printk("getstack_cleanup entered/n");
    cdev_del(&dev.cdev);  
    unregister_chrdev_region(MKDEV(getstack_major, 0), 1);
}

module_init(getstack_init);
module_exit(getstack_cleanup);

MODULE_AUTHOR("Huang Gao <huanggao@gmail.com>");
MODULE_LICENSE("GPL");


7.               References

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值