最近几个客户在使用公司的产品出现了严重的内存泄漏,导致机器服务宕机。在采样了部分数据后,在本地环境通过valgrind并没有发现内存泄漏的点,顿时茫然。在了解了systemtap这个动态追踪工具后,并轻松找到了产品代码中内存泄漏的地方是一个string对象占用了很大一块内存没有释放引起的。
下面通过一个简单的demo来演示string的内存泄漏。
systemtap检查内存泄漏代码
//leaks.stp
global ptr2bt
global ptr2size
global bt_stats
global quit
probe begin {
warn("Start tracing. Wait for 10 sec to complete.\n")
}
probe process("/usr/lib64/libc.so.6").function("__libc_malloc").return {
if (pid() == target()) {
if (quit) {
printf("\n=================================================================\n")
foreach (bt in bt_stats @sum- limit 3) {
print_ustack(bt)
printf("\t%d\n", @sum(bt_stats[bt]))
}
printf("=================================================================\n")
quit = 0
//exit()
}
//printf("malloc: %p (bytes %d)\n", $return, $bytes)
ptr = $return
bt = ubacktrace()
ptr2bt[ptr] = bt
ptr2size[ptr] = $bytes
bt_stats[bt] <<< $bytes
}
}
probe process("/usr/lib64/libc.so.6").function("__libc_free") {
if (pid() == target()) {
//printf("free: %p\n", $mem)
ptr = $mem
bt = ptr2bt[ptr]
delete ptr2bt[ptr]
bytes = ptr2size[ptr]
delete ptr2size[ptr]
bt_stats[bt] <<< -bytes
if (@sum(bt_stats[bt]) == 0) {
delete bt_stats[bt]
}
}
}
probe timer.s(10) {
quit = 1
printf("10 s timeout")
}
probe end {
delete ptr2bt
delete ptr2size
delete bt_stats
}
C++ demo代码
//test.cpp
// g++ -g test.cpp -o test
#include <unistd.h>
#include <iostream>
using namespace std;
string testStr;
void func1(int i) {
char* p = new char[10];
if(i%3==0) delete[] p;
}
void func2(int i) {
char* p = new char[20];
if(i%3==1) delete[] p;
}
void func3(int i) {
char* p = new char[30];
if(i%3==2) delete[] p;
}
void func4(int i) {
char* p = new char[40];
if(i%3==0) delete[] p;
}
void func5(int i) {
char* p = new char[50];
if(i%3==1) delete[] p;
}
void func6(int i) {
testStr.append("Just test mem leak! just test mem leak!");
if(i==40) testStr.clear(); //@1
//@2
/*if(i==40) {
string temp;
testStr.swap(temp);
}*/
}
int main() {
for(int i=0;i<60;i++) {
func1(i);
func2(i);
func3(i);
func4(i);
func5(i);
func6(i);
sleep(1);
}
return 0;
}
通过leaks.stp检查运行中test程序的内存泄漏调用栈,命令为
stap -v -d /home/dongsongz/debug/stap/test -d /usr/lib64/libc.so.6 -d /usr/lib64/libstdc++.so.6.0.19 ./leaks.stp -c ./test
其中-d表示要解析的动态库或可执行程序符号,-c跟的是可执行程序
stap -v -d /home/dongsongz/debug/stap/test -d /usr/lib64/libc.so.6 -d /usr/lib64/libstdc++.so.6.0.19 ./leaks.stp -c ./test
Pass 1: parsed user script and 478 library scripts using 279156virt/76256res/3488shr/73000data kb, in 460usr/210sys/681real ms.
WARNING: confusing usage, consider @entry($bytes) in .return probe: identifier '$bytes' at ./leaks.stp:29:29
source: ptr2size[ptr] = $bytes
^
WARNING: confusing usage, consider @entry($bytes) in .return probe: identifier '$bytes' at :30:30
source: bt_stats[bt] <<< $bytes
^
Pass 2: analyzed script: 5 probes, 12 functions, 4 embeds, 6 globals using 409788virt/208188res/4808shr/203632data kb, in 750usr/390sys/1144real ms.
Pass 3: translated to C into "/tmp/stapoWbHcD/stap_ee08b54b7f35d04a73950f12cd4af934_7346_src.c" using 409788virt/208552res/5172shr/203632data kb, in 50usr/80sys/137real ms.
Pass 4: compiled C into "stap_ee08b54b7f35d04a73950f12cd4af934_7346.ko" in 2950usr/520sys/3604real ms.
Pass 5: starting run.
WARNING: Start tracing. Wait for 10 sec to complete.
10 s timeout
=================================================================
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd9fcd9 : _ZNSs4_Rep9_S_createEmmRKSaIcE+0x59/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda08eb : _ZNSs4_Rep8_M_cloneERKSaIcEm+0x1b/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda0994 : _ZNSs7reserveEm+0x44/0xa0 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda0bff : _ZNSs6appendEPKcm+0x4f/0x100 [/usr/lib64/libstdc++.so.6.0.19]
0x400a05 : _Z5func6i+0x1a/0x2c [/home/dongsongz/debug/stap/test]
0x400a64 : main+0x4d/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
649
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd41289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x4009b0 : _Z5func5i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400a5a : main+0x43/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
350
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd41289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400961 : _Z5func4i+0x15/0x4f [/home/dongsongz/debug/stap/test]
0x400a50 : main+0x39/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
240
=================================================================
.
.此处省略中间打印
.
10 s timeout
=================================================================
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd9fcd9 : _ZNSs4_Rep9_S_createEmmRKSaIcE+0x59/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda08eb : _ZNSs4_Rep8_M_cloneERKSaIcEm+0x1b/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda0994 : _ZNSs7reserveEm+0x44/0xa0 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dda0bff : _ZNSs6appendEPKcm+0x4f/0x100 [/usr/lib64/libstdc++.so.6.0.19]
0x400a05 : _Z5func6i+0x1a/0x2c [/home/dongsongz/debug/stap/test]
0x400a64 : main+0x4d/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
2521
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd41289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x4009b0 : _Z5func5i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400a5a : main+0x43/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1650
0x7fa63dd4118d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7fa63dd41289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400961 : _Z5func4i+0x15/0x4f [/home/dongsongz/debug/stap/test]
0x400a50 : main+0x39/0x68 [/home/dongsongz/debug/stap/test]
0x7fa63d41e555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400799 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1320
=================================================================
10 s timeoutPass 5: run completed in 0usr/170sys/60522real ms.
可见string底层维护了一块动态内存,在使用过程中string通过clear()方法或赋值为一个简单的字符串都不会释放底层维护的内存块。string通过跟一个局部变量string置换可以释放内存,我们将demo稍作修改,注释@1语句处,并启用@2代码块,重新实验一下。实验结果可见string在置换后释放了内存,所以在最后一次打印泄漏前三中没有string的调用栈了。
stap -v -d /home/dongsongz/debug/stap/test -d /usr/lib64/libc.so.6 -d /usr/lib64/libstdc++.so.6.0.19 ./leaks.stp -c ./test
Pass 1: parsed user script and 478 library scripts using 279160virt/76260res/3488shr/73004data kb, in 430usr/270sys/712real ms.
WARNING: confusing usage, consider @entry($bytes) in .return probe: identifier '$bytes' at ./leaks.stp:29:29
source: ptr2size[ptr] = $bytes
^
WARNING: confusing usage, consider @entry($bytes) in .return probe: identifier '$bytes' at :30:30
source: bt_stats[bt] <<< $bytes
^
Pass 2: analyzed script: 5 probes, 12 functions, 4 embeds, 6 globals using 409792virt/208196res/4808shr/203636data kb, in 730usr/430sys/1166real ms.
Pass 3: translated to C into "/tmp/stapPMI9g3/stap_95eb7dd4136f7242f6310b79b69bc14f_7346_src.c" using 409792virt/208560res/5172shr/203636data kb, in 50usr/80sys/129real ms.
Pass 4: compiled C into "stap_95eb7dd4136f7242f6310b79b69bc14f_7346.ko" in 2870usr/480sys/3403real ms.
Pass 5: starting run.
WARNING: Start tracing. Wait for 10 sec to complete.
10 s timeout
=================================================================
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a2cd9 : _ZNSs4_Rep9_S_createEmmRKSaIcE+0x59/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a38eb : _ZNSs4_Rep8_M_cloneERKSaIcEm+0x1b/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a3994 : _ZNSs7reserveEm+0x44/0xa0 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a3bff : _ZNSs6appendEPKcm+0x4f/0x100 [/usr/lib64/libstdc++.so.6.0.19]
0x400af6 : _Z5func6i+0x1b/0x6d [/home/dongsongz/debug/stap/test]
0x400b95 : main+0x4d/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
649
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400aa0 : _Z5func5i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400b8b : main+0x43/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
350
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400a51 : _Z5func4i+0x15/0x4f [/home/dongsongz/debug/stap/test]
0x400b81 : main+0x39/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
240
=================================================================
.
.此处省略中间打印
.
10 s timeout
=================================================================
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a2cd9 : _ZNSs4_Rep9_S_createEmmRKSaIcE+0x59/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a38eb : _ZNSs4_Rep8_M_cloneERKSaIcEm+0x1b/0x80 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a3994 : _ZNSs7reserveEm+0x44/0xa0 [/usr/lib64/libstdc++.so.6.0.19]
0x7f56761a3bff : _ZNSs6appendEPKcm+0x4f/0x100 [/usr/lib64/libstdc++.so.6.0.19]
0x400af6 : _Z5func6i+0x1b/0x6d [/home/dongsongz/debug/stap/test]
0x400b95 : main+0x4d/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
2521
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400aa0 : _Z5func5i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400b8b : main+0x43/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1350
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400a51 : _Z5func4i+0x15/0x4f [/home/dongsongz/debug/stap/test]
0x400b81 : main+0x39/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1040
=================================================================
10 s timeout
=================================================================
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400aa0 : _Z5func5i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400b8b : main+0x43/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1650
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400a51 : _Z5func4i+0x15/0x4f [/home/dongsongz/debug/stap/test]
0x400b81 : main+0x39/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1320
0x7f567614418d : _Znwm+0x1d/0x90 [/usr/lib64/libstdc++.so.6.0.19]
0x7f5676144289 : _Znam+0x9/0x30 [/usr/lib64/libstdc++.so.6.0.19]
0x400a01 : _Z5func3i+0x15/0x50 [/home/dongsongz/debug/stap/test]
0x400b77 : main+0x2f/0x68 [/home/dongsongz/debug/stap/test]
0x7f5675821555 : __libc_start_main+0xf5/0x1c0 [/usr/lib64/libc-2.17.so]
0x400889 : _start+0x29/0x30 [/home/dongsongz/debug/stap/test]
1020
=================================================================
10 s timeoutPass 5: run completed in 10usr/180sys/60566real ms.