(一) gperftools-2.0 编译
tar –xzf gperftools-2.0.tar.gz
cd gperftools-2.0
./configure –prefix=/home/wuzhu/tools/gperftools –enable-frame-pointers
make && make install
注:
编译时打开了 –enable-frame-pointers ,这要求被测试的程序在编译时要加上gcc编译选项,否则某些多线程程序可能会 core:
CCFLAGS=-fno-omit-frame-pointer
(二) google cpu profiler 基本使用
嵌入 google cpu profiler 代码与编译
1) 在我们要测试的程序源码中先 include 头文件,然后在要 profile 的代码前后加上ProfilerStart() 和 ProfilerStop()
其接口声明如下:
1
2
3
4
5
|
#include <google/profiler.h>
int
ProfilerStart(
const
char
* fname);
void
ProfilerFlush();
void
ProfilerStop();
|
2) 在编译时加上 -fno-omit-frame-pointer 和 链接库 -ltcmalloc_and_profiler
CCFLAGS=-fno-omit-frame-pointer
-ltcmalloc_and_profiler
3) 执行程序,生成 profile 数据文件
4) 分析生成的数据文件
% pprof /bin/ls ls.prof
Enters “interactive” mode
% pprof –text /bin/ls ls.prof
Outputs one line per procedure
% pprof –gv /bin/ls ls.prof
Displays annotated call-graph via ‘gv’
% pprof –gv –focus=Mutex /bin/ls ls.prof
Restricts to code paths including a .*Mutex.* entry
% pprof –gv –focus=Mutex –ignore=string /bin/ls ls.prof
Code paths including Mutex but not string
% pprof –list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
% pprof –disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
% pprof –text localhost:1234
Outputs one line per procedure for localhost:1234
% pprof –callgrind /bin/ls ls.prof
Outputs the call information in callgrind format
示例一:
1) cpu_profiler_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <gperftools/profiler.h>
using
namespace
std;
int
loopop()
{
char
buffer1 [1024];
char
buffer2 [1024];
int
n = 0;
for
(
int
i = 0; i < 10000000; i++)
{
for
(
int
j = 0; j < 1000; j++)
{
n |= i%100 + j/100;
}
memset
(buffer1,0xa,1024);
memcpy
(buffer2,buffer1,1024);
}
return
n;
}
int
main(
int
argc,
char
** argv)
{
char
program[1024]={0};
snprintf(program,1023,
"%s_%d.prof"
,argv[0],getpid());
ProfilerStart(program);
printf
(
"result: %d\n"
, (loopop)() );
ProfilerStop();
return
0;
}
|
2) Makefile
GPerfTools=/home/wuzhu/tools/gperftools
CCFLAGS=-fno-omit-frame-pointer -g
ALL_BINS=cpu_profiler_example
all:$(ALL_BINS)
cpu_profiler_example :cpu_profiler_example.o
g++ $(CCFLAGS) -o $@ $^ -L./ -L$(GPerfTools)/lib -Wl,-Bdynamic -ltcmalloc_and_profiler
.cpp.o:
g++ $(CCFLAGS) -c -I./ -I$(GPerfTools)/include -fPIC -o $@ $<
clean:
rm -f $(ALL_BINS) *.o
3) 执行
[wuzhu@search041142.sqa.cm4 cpu_profiler]$ ./cpu_profiler_example
loopop: 255
PROFILE: interrupts/evictions/bytes = 5848/3861/185584
会产生性能数据文件: cpu_profiler_example_29502.prof
4). 分析性能数据
pprof –text cpu_profiler_example cpu_profiler_example_29502.prof
Using local file cpu_profiler_example.
Using local file cpu_profiler_example_29502.prof.
Removing killpg from all stack traces.
Removing main from all stack traces.
Removing __libc_start_main from all stack traces.
Total: 5962 samples
5894 98.9% 98.9% 5894 98.9% loopop
38 0.6% 99.5% 38 0.6% memcpy
28 0.5% 100.0% 28 0.5% memset
2 0.0% 100.0% 2 0.0% _init
注:
更详细的使用方法请见 google cpu profiler wiki:
http://gperftools.googlecode.com/svn/trunk/doc/cpuprofile.html
(三) Google CPU Profiler支持对动态链接库进行性能分析
下面,通过示例2来说明 Google Cpu Profiler 是支持对动态链接库进行性能分析的。
1) 源代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
//TestProfiler.h
extern
"C"
{
int
loopop();
}
//TestProfiler.cpp只定义了一个耗时计算函数,便于分析。
#include <string.h>
#include "TestProfiler.h"
extern
"C"
{
int
loopop()
{
char
buffer1 [1024];
char
buffer2 [1024];
int
n = 0;
for
(
int
i = 0; i < 10000000; i++)
{
for
(
int
j = 0; j < 1000; j++)
{
n |= i%100 + j/100;
}
memset
(buffer1,0xa,1024);
memcpy
(buffer2,buffer1,1024);
}
return
n;
}
}
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <gperftools/profiler.h>
#include "TestProfiler.h"
using
namespace
std;
int
main(
int
argc,
char
** argv)
{
char
program[1024]={0};
snprintf(program,1023,
"%s_%d.prof"
,argv[0],getpid());
ProfilerStart(program);
printf
(
"result: %d\n"
, (loopop)() );
ProfilerStop();
return
0;
}
|
2) Makefile
GPerfTools=/home/wuzhu/tools/gperftools
CCFLAGS=-fno-omit-frame-pointer -g
ALL_BINS=libTestProfiler.so main_dynamic_link
all:$(ALL_BINS)
main_dynamic_link :main_dynamic_link.o
g++ $(CCFLAGS) -o $@ $^ -L./ -L$(GPerfTools)/lib -Wl,-Bdynamic -lTestProfiler -ltcmalloc_and_profiler
libTestProfiler.so:TestProfiler.o
g++ -shared $(CCFLAGS) -o $@ $^
.cpp.o:
g++ $(CCFLAGS) -c -I./ -I$(GPerfTools)/include -fPIC -o $@ $<
clean:
rm -f $(ALL_BINS) *.o *.prof
3) 执行
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`
./main_dynamic_link
result: 127
PROFILE: interrupts/evictions/bytes = 5991/3915/188368
产生性能数据文件: main_dynamic_link_24327.prof
4) 分析性能数据
pprof –text main_dynamic_link main_dynamic_link_24327.prof
Using local file main_dynamic_link.
Using local file main_dynamic_link_24327.prof.
Removing killpg from all stack traces.
Removing main from all stack traces.
Removing __libc_start_main from all stack traces.
Total: 5991 samples
5899 98.5% 98.5% 5899 98.5% loopop
53 0.9% 99.3% 53 0.9% memcpy
38 0.6% 100.0% 38 0.6% memset
1 0.0% 100.0% 1 0.0% _init
由此证明,Google CPU Profiler支持对动态链接库的性能分析
(四) Google CPU Profiler 对用 dlopen 方式打开动态库的程序支持
运行时加载允许程序可以有选择地调用库中的函数。使用动态加载过程,程序可以先加载一个特定的库(已加载则不必),然后调用该库中的某一特定函数,这是构建支持插件的应用程序的一个普遍的方法。
示例 3 主要演示 Google CPU Profiler 是否支持对用 dlopen 方式打开动态库的程序进行性能分析。
还是以上述程序为例,对主程序代码进修改。
1) 源代码 (libTestProfiler.so 的源码同示例 2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <gperftools/profiler.h>
typedef
int
(*op_t) ();
int
main(
int
argc,
char
** argv)
{
int
not_close=0;
if
(argc >= 2)
not_close=
atoi
(argv[1]);
void
* dl_handle=NULL;
op_t loopop;
char
* error=NULL;
char
program[1024]={0};
snprintf(program,1023,
"%s_%d.prof"
,argv[0],getpid());
ProfilerStart(program);
dl_handle = dlopen(
"./libTestProfiler.so"
, RTLD_LAZY );
if
(!dl_handle)
{
printf
(
"dlopen failed! %s\n"
, dlerror() );
return
-1;
}
loopop = (op_t)dlsym( dl_handle,
"loopop"
);
error = dlerror();
if
(error != NULL)
{
printf
(
"dlsym failed! %s\n"
, error );
return
-1;
}
printf
(
"result: %d\n"
, (loopop)() );
if
(not_close == 0)
{
printf
(
"do dlclose()\n"
);
dlclose( dl_handle );
}
else
{
printf
(
"not dlclose()\n"
);
}
ProfilerStop();
return
0;
}
|
2) Makefile
GPerfTools=/home/wuzhu/tools/gperftools
CCFLAGS=-fno-omit-frame-pointer -g
ALL_BINS=libTestProfiler.so main_dlopen_link
all:$(ALL_BINS)
main_dlopen_link :main_dlopen_link.o
g++ $(CCFLAGS) -o $@ $^ -L./ -L$(GPerfTools)/lib -Wl,-Bdynamic -ltcmalloc_and_profiler -ldl
libTestProfiler.so:TestProfiler.o
g++ -shared $(CCFLAGS) -o $@ $^
.cpp.o:
g++ $(CCFLAGS) -c -I./ -I$(GPerfTools)/include -fPIC -o $@ $<
clean:
rm -f $(ALL_BINS) *.o *.prof
3) 运行
[wuzhu@search041142.sqa.cm4 cpu_profiler]$ ./main_dlopen_link
result: 127
do dlclose()
PROFILE: interrupts/evictions/bytes = 5984/3559/171280
产生性能分析数据:main_dlopen_link_1256.prof
4) 分析性能数据
pprof –text main_dlopen_link main_dlopen_link_13598.prof
Using local file main_dlopen_link.
Using local file main_dlopen_link_13598.prof.
Removing killpg from all stack traces.
Removing main from all stack traces.
Removing __libc_start_main from all stack traces.
Total: 5984 samples
801 13.4% 13.4% 801 13.4% 0x00002b124c0b4688
736 12.3% 25.7% 736 12.3% 0x00002b124c0b4652
735 12.3% 38.0% 735 12.3% 0x00002b124c0b4685
425 7.1% 45.1% 425 7.1% 0x00002b124c0b4671
414 6.9% 52.0% 414 6.9% 0x00002b124c0b464b
412 6.9% 58.9% 412 6.9% 0x00002b124c0b4676
401 6.7% 65.6% 401 6.7% 0x00002b124c0b4656
401 6.7% 72.3% 401 6.7% 0x00002b124c0b465c
392 6.6% 78.8% 392 6.6% 0x00002b124c0b4624
387 6.5% 85.3% 387 6.5% 0x00002b124c0b463e
382 6.4% 91.7% 382 6.4% 0x00002b124c0b467f
368 6.1% 97.8% 368 6.1% 0x00002b124c0b4639
45 0.8% 98.6% 45 0.8% memcpy
42 0.7% 99.3% 42 0.7% memset
21 0.4% 99.6% 21 0.4% 0x00002b124c0b468c
9 0.2% 99.8% 9 0.2% 0x00002b124c0b4645
3 0.1% 99.8% 3 0.1% 0x00002b124c0b467d
3 0.1% 99.9% 3 0.1% 0x00002b124c0b469c
2 0.0% 99.9% 2 0.0% 0x00002b124c0b46a6
1 0.0% 99.9% 1 0.0% 0x00002b124c0b4518
1 0.0% 99.9% 1 0.0% 0x00002b124c0b461b
1 0.0% 100.0% 1 0.0% 0x00002b124c0b4643
1 0.0% 100.0% 1 0.0% 0x00002b124c0b4695
1 0.0% 100.0% 1 0.0% 0x00002b124c0b46d4
很奇怪,这个结果显示 libTestProfiler.so 库中的符号没有正确解析,main_dlopen_link_1256.prof 文件也没有包含 libTestProfiler.so 的内存映射信息,但是我们确实在主程序已经通过dlopen将动态库装载到内存并执行成功了,为何在主程序的内存映射表中找不到动态库的信息呢?
经过一番分析和调查,终于找到原因,因为main_dlopen_link_1256.prof 文件的输出工作是在dlclose()函数执行之后调用的,而在此时主程序执行了dlclose()函数卸载了libTestProfiler.so,所以随后dump出的内存映射表当然就不会包含libTestProfiler.so的信息了。
下面,我们通过传入参数,禁止dlclose()
[wuzhu@search041142.sqa.cm4 cpu_profiler]$ ./main_dlopen_link 1
result: 127
not dlclose()
PROFILE: interrupts/evictions/bytes = 5968/3520/169408
pprof –text main_dlopen_link main_dlopen_link_18394.prof
Using local file main_dlopen_link.
Using local file main_dlopen_link_18394.prof.
Removing killpg from all stack traces.
Removing main from all stack traces.
Removing __libc_start_main from all stack traces.
Total: 5968 samples
5893 98.7% 98.7% 5893 98.7% loopop
46 0.8% 99.5% 46 0.8% memcpy
27 0.5% 100.0% 27 0.5% memset
2 0.0% 100.0% 2 0.0% _init
此时,动态库中的符号又能正常解析了。
(五) GoogleProfiler.h 小工具
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#ifndef __GOOGLE_PROFILER__
#define __GOOGLE_PROFILER__
#include <stdio.h>
#include <stdint.h>
#include <string>
#ifdef CPU_PROFILER
#include <google/profiler.h>
#endif
class
GoogleProfiler
{
public
:
enum
{
MAXBUFSIZE=1024
};
public
:
GoogleProfiler():_ready(
false
)
{
}
~GoogleProfiler()
{
ProfilerStop();
}
void
ProfilerStart()
{
#ifdef CPU_PROFILER
char
buf[MAXBUFSIZE]={0};
int
count = readlink(
"/proc/self/exe"
,buf, MAXBUFSIZE );
if
(count < 0 || count > MAXBUFSIZE)
return
;
_programBin.assign(buf);
printf
(
"ProgramBin=%s\n"
,_programBin.c_str());
_pid=getpid();
size_t
pos=_programBin.find_last_of(
'/'
);
if
(pos != std::string::npos)
{
_programName=_programBin.substr(pos+1);
}
else
{
_programName=_programBin;
}
printf
(
"ProfilerStart(%s)\n"
,buf);
snprintf(buf,MAXBUFSIZE,
"./%s_%d.prof"
,_programName.c_str(),_pid);
::ProfilerStart(buf);
_ready=
true
;
#endif
}
void
ProfilerStop()
{
#ifdef CPU_PROFILER
if
(_ready)
{
::ProfilerStop();
char
cmd[2048]={0};
snprintf(cmd,2047,
"pprof --callgrind %s %s_%d.prof >%s_%d.callgrind"
,_programBin.c_str(),_programName.c_str(),_pid,_programName.c_str(),_pid);
printf
(
"cmd=%s\n"
,cmd);
system
(cmd);
_ready=
false
;
}
#endif
}
void
ProfilerFlush()
{
::ProfilerFlush();
}
private
:
bool
_ready;
int
_pid;
std::string _programBin;
std::string _programName;
};
#endif
|
该小工具主要是提供如下功能:
自动生成性能数据文件的名称:程序名_进程号.prof
将性能数据文件自动转换成 .callgrind 格式,方便 kcachegrind 上展示
提供宏定义,可通过是否传 -DCPU_PROFILER 宏来打开或关闭google cpu profiler 性能采集.
下面是使用示例:
Main 程序
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <gperftools/profiler.h>
#include "GoogleProfiler.h"
typedef
int
(*op_t) ();
int
main(
int
argc,
char
** argv)
{
int
not_close=0;
if
(argc >= 2)
not_close=
atoi
(argv[1]);
void
* dl_handle=NULL;
op_t loopop;
char
* error=NULL;
GoogleProfiler profiler;
profiler.ProfilerStart();
dl_handle = dlopen(
"./libTestProfiler.so"
, RTLD_LAZY );
if
(!dl_handle)
{
printf
(
"dlopen failed! %s\n"
, dlerror() );
return
-1;
}
loopop = (op_t)dlsym( dl_handle,
"loopop"
);
error = dlerror();
if
(error != NULL)
{
printf
(
"dlsym failed! %s\n"
, error );
return
-1;
}
printf
(
"result: %d\n"
, (loopop)() );
if
(not_close == 0)
{
printf
(
"do dlclose()\n"
);
dlclose( dl_handle );
}
else
{
printf
(
"not dlclose()\n"
);
}
profiler.ProfilerStop();
return
0;
}
|
Makefile
GPerfTools=/home/wuzhu/tools/gperftools
CCFLAGS=-fno-omit-frame-pointer -g -DCPU_PROFILER
ALL_BINS=libTestProfiler.so \
main_dlopen_link2
all:$(ALL_BINS)
main_dlopen_link2 :main_dlopen_link2.o
g++ $(CCFLAGS) -o $@ $^ -L./ -L$(GPerfTools)/lib -Wl,-Bdynamic -ltcmalloc_and_profiler -ldl
libTestProfiler.so:TestProfiler.o
g++ -shared $(CCFLAGS) -o $@ $^
.cpp.o:
g++ $(CCFLAGS) -c -I./ -I$(GPerfTools)/include -fPIC -o $@ $<
clean:
rm -f $(ALL_BINS) *.o *.prof
执行
[wuzhu@search041142.sqa.cm4 cpu_profiler]$ ./main_dlopen_link2 1
ProgramBin=/home/wuzhu/develop/mytest/cpu_profiler/main_dlopen_link2
ProfilerStart(/home/wuzhu/develop/mytest/cpu_profiler/main_dlopen_link2)
result: 127
not dlclose()
PROFILE: interrupts/evictions/bytes = 5962/3555/171088
cmd=pprof –callgrind /home/wuzhu/develop/mytest/cpu_profiler/main_dlopen_link2 main_dlopen_link2_24515.prof >main_dlopen_link2_24515.callgrind
Using local file /home/wuzhu/develop/mytest/cpu_profiler/main_dlopen_link2.
Using local file main_dlopen_link2_24515.prof.
Removing killpg from all stack traces.
Removing main from all stack traces.
Removing __libc_start_main from all stack traces.
[wuzhu@search041142.sqa.cm4 cpu_profiler]$
生成 main_dlopen_link2_24515.prof 和 main_dlopen_link2_24515.callgrind
(六) 参考
http://www.cnblogs.com/lenolix/archive/2010/12/13/1904868.html
http://gperftools.googlecode.com/svn/trunk/doc/cpuprofile.html