linux 应用调试

本文介绍了Linux应用程序调试的重要性,包括异常退出、卡死无响应、响应值错误等问题的解决方案,如使用coredump分析、strace跟踪系统调用及日志跟踪等方法。
摘要由CSDN通过智能技术生成

1. 背景

linux系统广泛应用于服务器端和客户端(如android), 因而大部分程序都是linux应用程序;

应用程序的生命周期包括: 需求分析, 概要设计, 编码实现, 调试维护. 其中第四阶段的耗时最

长, 因而熟悉linux的调试至关重要.

2. 为何调试和维护?

程序运行行为不符合预期, 完善过程中引入新需求产生意想不到的问题;意料之外的行为
按严重程度分: 异常退出, 卡死无响应, 响应值不对, 无法长时间运行, 响应慢.

3. 解决方案

下面对每类行为提出相应的解决方案:
1) 异常退出: 产生的原因有内存非法地址访问, 权限不足, 内存泄露被杀, 逻辑错误退出;
解决方法, 即生成crash时的黑盒子coredump(应用进程的内存镜像),然后可从该文件分析崩溃时的线程调用栈和各栈中变量值; 
权限异常和逻辑错误需要跟踪打印调用返回值;
2) 卡死无响应: 产生的原因有等待锁,等待系统调用返回, 死循环;
解决方法: 手动触发coredump分析进程调用栈, strace跟踪系统调用耗时
3) 响应值不对: 逻辑错误,可printf日志跟踪, dumpstack打印调用栈;
4) 无法长时间运行: printf日志跟踪, 分析coredump, 监控内存泄露;
5) 响应慢: 可printf日志跟踪, dumpstack打印调用栈,strace跟踪系统调用耗时, profile日志分析

4. 代码示例

下面给出一个示例程序, 包含几种技术(printf, backtrace, coredump, strace, valgrind);
#include <execinfo.h>  
#include <stdio.h>  
#include <stdlib.h>  
#include <pthread.h>

/* Obtain a backtrace and print it to @code{stdout}. */  
void print_trace (void)  
{  
    void *array[10];  
    int size;  
    char **strings;  
    int i;  
    size = backtrace(array, 10);  
    strings = backtrace_symbols(array, size);  
    if(NULL == strings)  
    {  
        perror("backtrace_synbols");  
        exit(EXIT_FAILURE);  
    }  

    printf ("Obtained %d stack frames.\n", (size - 1));  

    for (i = 1; i < size; i++)  
        printf ("%s\n", strings[i]);  

    free (strings);  
    strings = NULL;  
} 

void blocked_exp(void){
    int block_num = 0;
    void* p = NULL;
    while(1) {
        p = malloc(1024);
        printf("pid=%d, tid=%d, %s/block_num=%d\n", 
                (int)getpid(), (int)pthread_self(), 
                __FUNCTION__, block_num);
        block_num++;
        sleep(block_num);
    }
}

int func(int *p)
{
    printf("file[%s:%d]: func[%s]/ p=%p\n",
            __FILE__, __LINE__, __FUNCTION__, p);
    print_trace();    
    
    blocked_exp();
    
    int y = *p;
    return y;
}
int main()
{
    int *p = NULL;
    printf("file[%s:%d](pid=%d tid=%d): func[%s]/ p=%p\n",
            __FILE__, __LINE__, 
            (int)getpid(), (int)pthread_self(), 
            __FUNCTION__, p);
    return func(p);
}

5. 调试步骤

编译:
gcc -g -rdynamic -o app.bin app.c设置coredump文件大小(KB):ulimit -c 102400测试执行:$ ./app.bin file[app.c:56](pid=27594 tid=0): func[main]/ p=(nil)file[app.c:44]: func[func]/ p=(nil)Obtained 4 stack frames../app.bin(func+0x36) [0x400be4]./app.bin(main+0x5a) [0x400c51]/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x7ffa8a1c3ec5]./app.bin() [0x4009e9]pid=27594, tid=0, blocked_exp/block_num=0pid=27594, tid=0, blocked_exp/block_num=1pid=27594, tid=0, blocked_exp/block_num=2^C
手动触发coredump:
kill -6 $pid-app.bin
加载core文件,查看崩溃或卡死点where,查看源码位置l, 查看局部变量值p;
gdb app.bin core
(gdb) where
#0  0x00007fdc0e9b2ab0 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81
#1  0x00007fdc0e9b2964 in __sleep (seconds=0) at ../sysdeps/unix/sysv/linux/sleep.c:137
#2  0x0000000000400bac in blocked_exp () at app.c:37
#3  0x0000000000400be9 in func (p=0x0) at app.c:47
#4  0x0000000000400c51 in main () at app.c:59
(gdb) l app.c:37
32     while(1) {
33         printf("pid=%d, tid=%d, %s/block_num=%d\n", 
34                 (int)getpid(), (int)pthread_self(), 
35                 __FUNCTION__, block_num);
36         block_num++;
37         sleep(block_num);
38     }
39 }
40 
41 int func(int *p)
(gdb) f 2
#2  0x0000000000400bac in blocked_exp () at app.c:37
37         sleep(block_num);
(gdb) p block_num
$1 = 4
(gdb)q

使用strace跟踪系统调用耗时:
$ strace -T  -f ./app.bin
execve("./app.bin", ["./app.bin"], [/* 90 vars */]) = 0 <0.000096>
brk(0)                                  = 0x1fca000 <0.000005>
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory) <0.000013>
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f43f41f5000 <0.000015>
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory) <0.000013>
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000017>
fstat(3, {st_mode=S_IFREG|0644, st_size=144023, ...}) = 0 <0.000009>
mmap(NULL, 144023, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f43f41d1000 <0.000010>
close(3)                                = 0 <0.000008>
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory) <0.000009>
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000015>
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\37\2\0\0\0\0\0"..., 832) = 832 <0.000010>
fstat(3, {st_mode=S_IFREG|0755, st_size=1845024, ...}) = 0 <0.000009>
mmap(NULL, 3953344, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f43f3c0f000 <0.000012>

valgrind检查内存泄露:
$ valgrind ./app.bin 
==29115== Memcheck, a memory error detector
==29115== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==29115== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright info
==29115== Command: ./app.bin
==29115== 
file[app.c:58](pid=29115 tid=0): func[main]/ p=(nil)
file[app.c:46]: func[func]/ p=(nil)
Obtained 4 stack frames.
./app.bin(func+0x36) [0x400c3a]
./app.bin(main+0x5a) [0x400ca7]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x4e58ec5]
./app.bin() [0x400a29]
pid=29115, tid=0, blocked_exp/block_num=0
pid=29115, tid=0, blocked_exp/block_num=1
pid=29115, tid=0, blocked_exp/block_num=2
pid=29115, tid=0, blocked_exp/block_num=3
pid=29115, tid=0, blocked_exp/block_num=4
pid=29115, tid=0, blocked_exp/block_num=5
pid=29115, tid=0, blocked_exp/block_num=6
^C==29115== 
==29115== HEAP SUMMARY:
==29115==     in use at exit: 7,168 bytes in 7 blocks
==29115==   total heap usage: 13 allocs, 6 frees, 9,196 bytes allocated
==29115== 
==29115== LEAK SUMMARY:
==29115==    definitely lost: 6,144 bytes in 6 blocks
==29115==    indirectly lost: 0 bytes in 0 blocks
==29115==      possibly lost: 0 bytes in 0 blocks
==29115==    still reachable: 1,024 bytes in 1 blocks
==29115==         suppressed: 0 bytes in 0 blocks
==29115== Rerun with --leak-check=full to see details of leaked memory
==29115== 
==29115== For counts of detected and suppressed errors, rerun with: -v
==29115== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

在线调试,可断点、单步跟踪调试:
sudo gdb attach $pid
(gdb) bt
#0  0x00007ffe04b5fab0 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81
#1  0x00007ffe04b5f964 in __sleep (seconds=0) at ../sysdeps/unix/sysv/linux/sleep.c:137
#2  0x0000000000400c02 in blocked_exp () at app.c:39
#3  0x0000000000400c3f in func (p=0x0) at app.c:49
#4  0x0000000000400ca7 in main () at app.c:61
(gdb) c
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值