在本文中, 我们来介绍一下strace命令的一些用法。strace命令主要是用来查系统调用, 它有很多参数, 功能很强大, 本文先不进行穷尽介绍, 只是介绍一下用strace来定位core dump问题。 我们先看如下程序(test.c文件):
#include <stdio.h>
void swap(int *px, int *py)
{
int tmp = *px;
*px = *py;
*py = tmp;
}
int main()
{
int a = 1;
int b = 2;
int c = a + b;
printf("%d, %d, %d\n", a, b, c);
swap(&a,& b);
printf("%d, %d, %d\n", a, b, c);
int *p = NULL;
*p = 0;
return 0;
}
熟悉C/C++的朋友一眼就能看出程序会core dump, 我们来具体看看:
[taoge@localhost test]$ ls
test.c
[taoge@localhost test]$ ulimit -c
0
[taoge@localhost test]$ gcc -g test.c
[taoge@localhost test]$ ls
a.out test.c
[taoge@localhost test]$ ./a.out
1, 2, 3
2, 1, 3
Segmentation fault (core dumped)
[taoge@localhost test]$ ls
a.out test.c
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$ strace ./a.out
execve("./a.out", ["./a.out"], [/* 22 vars */]) = 0
brk(0) = 0x92db000
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb784a000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=49072, ...}) = 0
mmap2(NULL, 49072, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb783e000
close(3) = 0
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0 N\211\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1855584, ...}) = 0
mmap2(0x87e000, 1620360, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x87e000
mprotect(0xa03000, 4096, PROT_NONE) = 0
mmap2(0xa04000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x185) = 0xa04000
mmap2(0xa07000, 10632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xa07000
close(3) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb783d000
set_thread_area({entry_number:-1 -> 6, base_addr:0xb783d6c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
mprotect(0xa04000, 8192, PROT_READ) = 0
mprotect(0x876000, 4096, PROT_READ) = 0
munmap(0xb783e000, 49072) = 0
fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7849000
write(1, "1, 2, 3\n", 81, 2, 3
) = 8
write(1, "2, 1, 3\n", 82, 1, 3
) = 8
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault (core dumped)
[taoge@localhost test]$
可以看到, ulimit -c 的值为0, 也就是说, 程序即使core dump后也不会产生core文件, 执行./a.out后, 果然如此, 没有core. 继续执行strace ./a.out(不能是strace a.out)命令后, 就能看到系统调用, 但是, 这对定位core dump问题似乎没有什么用。
那怎么办呢?这就得介绍一下strace -i a.out的用法。 加上i后, 就会显示具体在哪个地址上进行调用, 如下:
[taoge@localhost test]$ strace -i ./a.out
[00ff4424] execve("./a.out", ["./a.out"], [/* 22 vars */]) = 0
[0086e2fd] brk(0) = 0x818e000
[0086f6d3] mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb771c000
[0086f5d1] access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
[0086f494] open("/etc/ld.so.cache", O_RDONLY) = 3
[0086f45e] fstat64(3, {st_mode=S_IFREG|0644, st_size=49072, ...}) = 0
[0086f6d3] mmap2(NULL, 49072, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7710000
[0086f4cd] close(3) = 0
[0086f494] open("/lib/libc.so.6", O_RDONLY) = 3
[0086f514] read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0 N\211\0004\0\0\0"..., 512) = 512
[0086f45e] fstat64(3, {st_mode=S_IFREG|0755, st_size=1855584, ...}) = 0
[0086f6d3] mmap2(0x87e000, 1620360, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x87e000
[0086f754] mprotect(0xa03000, 4096, PROT_NONE) = 0
[0086f6d3] mmap2(0xa04000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x185) = 0xa04000
[0086f6d3] mmap2(0xa07000, 10632, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xa07000
[0086f4cd] close(3) = 0
[0086f6d3] mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb770f000
[0085a552] set_thread_area({entry_number:-1 -> 6, base_addr:0xb770f6c0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
[0086f754] mprotect(0xa04000, 8192, PROT_READ) = 0
[0086f754] mprotect(0x876000, 4096, PROT_READ) = 0
[0086f711] munmap(0xb7710000, 49072) = 0
[00ba1424] fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
[00ba1424] mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb771b000
[00ba1424] write(1, "1, 2, 3\n", 81, 2, 3
) = 8
[00ba1424] write(1, "2, 1, 3\n", 82, 1, 3
) = 8
[08048479] --- SIGSEGV (Segmentation fault) @ 0 (0) ---
[????????] +++ killed by SIGSEGV (core dumped) +++
Segmentation fault (core dumped)
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$ addr2line -e a.out 08048479
/home/taoge/test/test.c:21
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$
[taoge@localhost test]$ cat test.c -n
1 #include <stdio.h>
2
3 void swap(int *px, int *py)
4 {
5 int tmp = *px;
6 *px = *py;
7 *py = tmp;
8 }
9
10 int main()
11 {
12 int a = 1;
13 int b = 2;
14 int c = a + b;
15 printf("%d, %d, %d\n", a, b, c);
16
17 swap(&a,& b);
18 printf("%d, %d, %d\n", a, b, c);
19
20 int *p = NULL;
21 *p = 0;
22
23 return 0;
24 }
[taoge@localhost test]$
通过strace -i ./a.out定位出core dump的地址, 然后通过addr2line来定位出对应的代码行。
但是, 上述过程有个问题, 它依赖于重新执行a.out, 从本质上讲, 实际上就类似于gdb中的r命令, 对于一些概率性的core dump问题, strace无能为力, 那还是用之前介绍过的dmesg + addr2line吧。
OK, 本文先介绍到这里, 后面会继续介绍strace命令的其他用途。 最后, 我们来验证一下, strace -i ./a.out会重新执行a.out, 程序如下:
#include <stdio.h>
void swap(int *px, int *py)
{
int tmp = *px;
*px = *py;
*py = tmp;
}
int main()
{
int a = 1;
int b = 2;
int c = a + b;
printf("%d, %d, %d\n", a, b, c);
FILE *fp = fopen("data.txt", "a");
fprintf(fp, "hello\n");
fclose(fp);
swap(&a,& b);
printf("%d, %d, %d\n", a, b, c);
int *p = NULL;
*p = 0;
return 0;
}
OK, 不多说。