最近由于项目需求,需要将Oprofile移植到海思3516平台上。Oprofile是一款比较好用的开源的性能分析工具。它通过采样CPU来计算程序中每个部分占用的时间。应该说是比较准确的。
在安装oprofile之前,需要保证系统支持oprofile。即在系统编译内核的时候要将一下两项勾上
make menuconfig
General setup --->
[*] Profiling support
<*> OProfile system profiling
Oprofile 需要popt库和binutils库的支持,因此在安装Oprofile 之前,要先安装popt和binutils。
在安装之前需要设定几个环境变量
export CC=arm-none-linux-gnueabi-gcc //修改成你的编译器
export CXX=arm-none-linux-gnueabi-g++ //修改成你的编译器
export CFLAGS=-static
export CXXFLAGS=-static
export CPPFLAGS=-static
export ac_cv_va_copy=C99
一、安装popt库
下载地址:http://download.chinaunix.net/download.php?id=16763&ResourceID=8268
下载完成后,解压:
tar zxvf popt-1.7.tar.gz
然后到目录中执行一下命令
./configure --with-kernel-support --target= <arm-hisiv100-linux> --host=<arm-hisiv100-linux> --prefix=</home/XXX/oprofile> && make && make install
--target 和 --host 都是你系统的名称。--prefix 是你要库的生成目录。 --with-kernel-support是在2.6的核才可以这样,不需要核的源码。在2.4的核上时,则需要指定--with-kernel=<核的源码路径>
编译完成之后,将生成的include和lib目录下的文件分别拷贝到交叉编译器的include和lib目录下。这样popt也就安装完成了。
二、安装binutils
下载地址:http://ftp.gnu.org/gnu/binutils/
下载完成后,解压:
tar zxvf binutils-2.20.1.tar.gz
然后在目录中执行以下命令:
./configure --with-kernel-support --target=arm-hisiv100-linux --host=arm-hisiv100-linux --enable-install-libbfd --prefix=/home/xxx/oprofile/binutils --disable-nls
--enable-install-libbfd 是为了安装bfd库,因为在默认情况下,binutils 是不安装bfd库的。这样会导致后面编译Oprofile 的时候出错
--disable-nls ,如果不增加这个选项,那么在后面编译的时候就会增加对/devel/gettext库的依赖。可能会导致后面Oprofile的编译出错。
三、安装oprofile
下载地址:http://oprofile.sourceforge.net/news/
解压:
tar zxvf oprofile-0.9.6.tar.gz
如果你是2.6的核,那么在编译之前你还需要修改几处代码,以消除对2.4核的支持(不然后面会出错的)。主要有以下几个地方(将红色部分删除)。
./daemon/Makefile.am
Line 1:
SUBDIRS = liblegacy .
Line 49:
bin_PROGRAMS = oprofiled
oprofiled_LDADD = \
liblegacy/liblegacy.a \
../libabi/libabi.a \
../libdb/libodb.a \
../libop/libop.a \
../libutil/libutil.a
./daemon/Makefile.in
Line 73:
oprofiled_DEPENDENCIES = liblegacy/liblegacy.a ../libabi/libabi.a \
Line 239:
SUBDIRS = liblegacy .
Line 284:
oprofiled_LDADD = \
liblegacy/liblegacy.a \
../libabi/libabi.a \
../libdb/libodb.a \
../libop/libop.a \
../libutil/libutil.a
./daemon/oprofiled.c
Line 80:
extern struct oprofiled_ops opd_24_ops;
Line 480:
switch (op_get_interface()) {
case OP_INTERFACE_24:
printf("Using 2.4 OProfile kernel interface.\n");
return &opd_24_ops;
case OP_INTERFACE_26:
printf("Using 2.6+ OProfile kernel interface.\n");
return &opd_26_ops;
主要就是以上的地方,可能由于版本不同等原因,行号等可能有变化。
修改完成后输入一下命令:
./configure --with-kernel-support --target=arm-hisiv100-linux --host=arm-hisiv100-linux --with-binutils=/home/xxx/oprofile/binutils --prefix=/home/xxx/oprofile/oprofile && make && make install
命令执行完成之后,就在oprofile目录下生成了oprofile的文件。将这些文件拷贝到嵌入式平台上,至此,oprofile就移植成功了。
+++++++++++++++++++++++++++=
3.2版本中出现错误
18:30: error: linux/perf_event.h: No such file or directory
下载一个perf_event.h
添加__NR_perf_event_open 298
从Linux Kernel2.6.31版本开始,Linux内核开始提供一个叫__NR_perf_counter_open(最新的版本里叫__NR_perf_event_open)的系统调用。使用这个系统调用我们可以像使用文件一样打开一个Performance counter,通过设置不同的参数让这个Performance Counter统计不同的软件或硬件事件,然后就可以向读文件一样来读取这些事件的统计结果。比如我可以打开一个Performance Counter统计某一个进程的CPU Cache Miss次数。关于如何传递参数构造Performance Counter来统计不同的事件可以看这篇日志: http://tblog29.appspot.com/blog/1004
下面是我写的一个小程序,它为每个CPU和每个进程开一个Performance Counter,统计每个CPU上的Cache miss和每个进程上的Cache miss(不能统计每个进程在单个CPU上的事件,详见上边那篇日志)。本代码参考了 perf 的stat部分。运行需要CAP_SYS_ADMIN权限
1 mperf.h
- /*
- * eperf.h
- *
- * Created on: Jan 28, 2010
- * Author: hchen
- */
- #ifndef EPERF_H_
- #define EPERF_H_
- #include <time.h>
- #include <asm/unistd.h>
- #include "perf_event.h"
- #define MAX_COUNTERS 256
- #define MAX_NR_CPUS 32
- #define PROC "/proc"
- /*
- * We define u64 as unsigned long long for every architecture
- * so that we can print it with %Lx without getting warnings.
- */
- typedef unsigned long long u64;
- typedef signed long long s64;
- typedef unsigned int u32;
- typedef signed int s32;
- typedef unsigned short u16;
- typedef signed short s16;
- typedef unsigned char u8;
- typedef signed char s8;
- static inline int
- sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
- unsigned long flags)
- {
- attr->size = sizeof(*attr);
- //This system call is defined in asm/unistd.h, in the latest linux kernel
- //it's name has been changed to __NR_perf_event_open .
- return syscall(__NR_perf_counter_open, attr, pid, cpu, group_fd, flags);
- }
- #endif /* EPERF_H_ */
2 eperf.c
- #include <time.h>
- #include <unistd.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <asm/unistd.h>
- #include <dirent.h>
- #include "eperf.h"
- unsigned int verbose = 0;
- //event to be countered
- static struct perf_event_attr attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }
- };
- int nr_counters = 0;
- static unsigned int nr_cpus = 0; // amount of cpus
- static int inherit = 1;
- static int scale = 1;
- //used to save performance counter
- static int fd[MAX_COUNTERS];
- /*
- * Read out the results of a single counter:
- */
- static void read_counter(int counter)
- {
- u64 single_count[3];
- size_t res, nv;
- if (fd[counter] <= 0)
- return;
- nv = scale ? 3 : 1;
- res = read(fd[counter], single_count, nv * sizeof(u64));
- if(res == nv * sizeof(u64)){
- if(verbose)
- printf("Counter %d: %llu\n", counter, single_count[0]);
- }else{
- fprintf(stderr, "Fail to read counter %d\n", counter);
- }
- }
- void close_all_counters(){
- int counter, tn;
- tn = nr_cpus + nr_counters;
- for (counter = 0; counter < tn; counter++){
- if (fd[counter] <= 0)
- continue;
- close(fd[counter]);
- fd[counter] = -1;
- }
- }
- void run_perf_stat()
- {
- int counter, tn;
- tn = nr_cpus + nr_counters;
- for (counter = 0; counter < tn; counter++)
- read_counter(counter);
- }
- static void create_perf_stat_counter(int counter, int pid, int system_wide)
- {
- struct perf_event_attr attr; //cache miss
- memcpy(&attr, attrs, sizeof(struct perf_event_attr));
- if (scale)
- attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
- PERF_FORMAT_TOTAL_TIME_RUNNING;
- if (system_wide) {
- unsigned int cpu;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0);
- }
- } else {
- attr.inherit = inherit;
- attr.disabled = 0;
- attr.enable_on_exec = 1;
- fd[counter + nr_cpus] = sys_perf_event_open(&attr, pid, -1, -1, 0);
- }
- }
- int main(int argc, const char **argv)
- {
- if(argc > 1)
- verbose = atoi(argv[1]);
- DIR *dir;
- struct dirent *drp;
- int run_count, p, pid;
- struct timespec tim, tim2;
- tim.tv_sec = 1; tim.tv_nsec = 0;
- nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);//the the number of CPU
- int counter = 0;
- /* Open /proc directory */
- if ((dir = opendir(PROC)) == NULL) {
- perror("opendir /proc error!");
- exit(4);
- }
- //create counters for each CPU
- create_perf_stat_counter(-1, 1, 1);
- p = 0;
- while ((p++) < 254) {
- /* Get directory entries */
- while ((drp = readdir(dir)) != NULL) {
- if (isdigit(drp->d_name[0]))
- break;
- }
- if (drp) {
- pid = atoi(drp->d_name);
- create_perf_stat_counter(counter, pid, 0);
- if(fd[counter] != -1)
- counter++;
- }
- }
- nr_counters = counter - 1;
- /* Close /proc directory */
- closedir(dir);
- run_count = 100;
- // for (run_idx = 0; run_idx < run_count; run_idx++) {
- while (1) {
- nanosleep(&tim , &tim2);
- run_perf_stat();
- }
- close_all_counters();
- return 1;
- }