perf-perf stat用户层代码分析

perf_event 源码分析

前言

简单来说,perf是一种性能监测工具,它首先对通用处理器提供的performance counter进行编程,设定计数器阈值和事件,然后性能计数器就会在设定事件发生时递增计数器,直至这个计数器的计数值达到阈值,在不同的结构中对于计数器数值的提取有不同的方式,例如MIPS上会注册一个硬件中断,这样在计数器溢出时触发一个硬件中断,在中断处理函数中记录数值,x86中则是利用通知链机制,将溢出处理函数注册到die_chain通知链上,它会利用任何一个硬件中断发生的时机,检测性能计数器是否溢出,是则记录这个数值,这种实现方式就避免了单独为性能计数器溢出注册一个硬件中断。

perf源码分为用户层和内核层,用户层代码为用户提供命令行指定事件与采样方式,perf的一大特点就体现在丰富的用户层工具,可以说,内核部分代码只是为perf提供采样引擎,用户层才是perf的精华。用户层代码位于src/tools/perf目录下,c代码有13000行左右,此外还有大量的脚本程序。内核层代码分为结构无关代码(位于src/kernel/core/目录),和结构相关代码(位于src/arch/x86/cpu/**)。

这里先列个框架:首先从系统启动初始化开始,perf-init的相关工作,之后介绍用户层指定事件,通过系统调用转入内核,执行采样,采样数据通过内存映射返回给用户层,用户层工具进行上层分析并显示

perf_event源码分析(一)——cmd_record

perf's main entry

tools/perf/perf.c

static struct cmd_struct commands[] = {
    { "buildid-cache", cmd_buildid_cache, 0 },
    { "buildid-list", cmd_buildid_list, 0 },
    { "diff",   cmd_diff,   0 },
    { "evlist", cmd_evlist, 0 },
    { "help",   cmd_help,   0 },
    { "list",   cmd_list,   0 },
    { "record", cmd_record, 0 },
    { "report", cmd_report, 0 },
    { "bench",  cmd_bench,  0 },
    { "stat",   cmd_stat,   0 },
    { "timechart",  cmd_timechart,  0 },
    { "top",    cmd_top,    0 },
    { "annotate",   cmd_annotate,   0 },
    { "version",    cmd_version,    0 },
    { "script", cmd_script, 0 },
    { "sched",  cmd_sched,  0 },
#ifdef HAVE_LIBELF_SUPPORT
    { "probe",  cmd_probe,  0 },
#endif
    { "kmem",   cmd_kmem,   0 },
    { "lock",   cmd_lock,   0 },
    { "kvm",    cmd_kvm,    0 },
    { "test",   cmd_test,   0 },
#ifdef HAVE_LIBAUDIT_SUPPORT
    { "trace",  cmd_trace,  0 },
#endif
    { "inject", cmd_inject, 0 },
    { "mem",    cmd_mem,    0 },
    { "data",   cmd_data,   0 },
};

perf record's CALL CHAIN:

cmd_record
    ;; new a struct "record" rec, and a struct "evlist" in rec->evlist;
    perf_evlist__new
    perf_config
    __cmd_record(&record, argc, argv); // fill out "struct record" 
        perf_session__new(file, false, tool); // New a sesssion for this rec, rec->session, attention: file is "struct perf_data_file *file",  &rec->file;
            machines__init(&session->machines);
            ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
            perf_data_file__open(file) 
                check_pipe(file)
                file->path = "perf.data" // If not specified name, fill out file->path
                open_file(file);
                    fd = perf_data_file__is_read(file) ? open_file_read(file) : open_file_write(file);
                    file->fd = fd;
            perf_session__create_kernel_maps(session) // 
        fd = perf_data_file__fd(file); // Get rec's fd, rec->file->fd
        record__init_features(rec); 
            perf_header__set_feat // Fill out session's header of this rec, rec->session->header
        record__open(rec)
            perf_evlist__config(evlist, opts); // perf_evlist
                perf_evsel__config(evsel, opts); // perf_evsel
        perf_header__clear_feat
        perf_header__write_pipe / perf_session__write_header
        perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine);
        perf_event__synthesize_modules(tool, process_synthesized_event, machine);
        machines__process_guests(&session->machines,perf_event__synthesize_guest_os, tool);
        __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,process_synthesized_event, opts->sample_address);
        
        
tools/perf/builtin-record.c

int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
{
    int err = -ENOMEM;
    struct record *rec = &record;
    char errbuf[BUFSIZ];

    rec->evlist = perf_evlist__new();
    if (rec->evlist == NULL)
        return -ENOMEM;

    perf_config(perf_record_config, rec);  // 解析, tools/perf/util/config.c

    argc = parse_options(argc, argv, record_options, record_usage,
                PARSE_OPT_STOP_AT_NON_OPTION);
    if (!argc && target__none(&rec->opts.target))
        usage_with_options(record_usage, record_options);

    if (nr_cgroups && !rec->opts.target.system_wide) {
        ui__error("cgroup monitoring only available in"
              " system-wide mode\n");
        usage_with_options(record_usage, record_options);
    }
}
tools/perf/util/parse-events.c

setup_events // tools/perf/builtin-stat.c
    parse_events // tools/perf/util/parse-events.c
    
parse_events  // tools/perf/util/parse-events.c

int parse_events(struct perf_evlist *evlist, const char *str)
{
    struct parse_events_evlist data = {
        .list = LIST_HEAD_INIT(data.list),
        .idx  = evlist->nr_entries,
    };
    int ret;

    ret = parse_events__scanner(str, &data, PE_START_EVENTS);
    perf_pmu__parse_cleanup();
    if (!ret) {
        int entries = data.idx - evlist->nr_entries;
        perf_evlist__splice_list_tail(evlist, &data.list, entries);
        evlist->nr_groups += data.nr_groups;
        return 0;
    }

    /*
     * There are 2 users - builtin-record and builtin-test objects.
     * Both call perf_evlist__delete in case of error, so we dont
     * need to bother.
     */
    return ret;
}

struct introduction

tools/perf/util/target.h

struct target {
    const char   *pid;
    const char   *tid;
    const char   *cpu_list;
    const char   *uid_str;
    uid_t        uid;
    bool         system_wide;
    bool         uses_mmap;
    bool         default_per_cpu;
    bool         per_thread;
};
===

tools/perf/util/data.h

struct perf_data_file {
    const char      *path;
    int          fd;
    bool             is_pipe;
    bool             force;
    unsigned long        size;
    enum perf_data_mode  mode;
};

=== 

tools/perf/util/session.h

struct perf_session {
    struct perf_header  header;
    struct machines     machines;
    struct perf_evlist  *evlist;
    struct trace_event  tevent;
    bool            repipe;
    bool            one_mmap;
    void            *one_mmap_addr;
    u64         one_mmap_offset;
    struct ordered_events   ordered_events;
    struct perf_data_file   *file;
    struct perf_tool    *tool;
};

===

tools/perf/util/evlist.h 

struct perf_evlist {
    struct list_head entries;
    struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
    int      nr_entries;
    int      nr_groups;
    int      nr_mmaps;
    size_t       mmap_len;
    int      id_pos;
    int      is_pos;
    u64      combined_sample_type;
    struct {
        int cork_fd;
        pid_t   pid;
    } workload;
    bool         overwrite;
    struct fdarray   pollfd;
    struct perf_mmap *mmap;
    struct thread_map *threads; // threads
    struct cpu_map    *cpus;   // cpus
    struct perf_evsel *selected;
    struct events_stats stats;
};

=== 

/** struct perf_evsel - event selector **/

Each event passed from user mapping one perf_evsel struct. 

struct perf_evsel {
    struct list_head    node;
    struct perf_event_attr  attr;
    char            *filter;
    struct xyarray      *fd;
    struct xyarray      *sample_id;
    u64         *id;
    struct perf_counts  *counts;
    struct perf_counts  *prev_raw_counts;
    int         idx;
    u32         ids;
    char            *name;
    double          scale;
    const char      *unit;
    bool            snapshot;
    struct event_format *tp_format;
    ...
    ...
    struct perf_evsel   *leader;
}

=== 

tools/perf/builtin-record.c

struct record {
    struct perf_tool    tool;
    struct record_opts  opts;
    u64         bytes_written;
    struct perf_data_file   file;
    struct perf_evlist  *evlist;
    struct perf_session *session;
    const char      *progname;
    int         realtime_prio;
    bool            no_buildid;
    bool            no_buildid_cache;
    long            samples;
};

===
Here is important, perf_stat is an array include three "struct stats" in "perf_stat", 
and will init perf_stat: 
    for (i = 0; i < 3; i++)
        init_stats(&ps->res_stats[i]);


struct perf_stat {
    struct stats      res_stats[3];
};

tools/perf/util/stat.h

struct stats
{
    double n, mean, M2;
    u64 max, min;
};

==== 
tools/perf/util/evsel.h

struct perf_counts_values {
    union {
        struct {
            u64 val;
            u64 ena;
            u64 run;
        };
        u64 values[3];
    };
};

struct perf_counts {
    s8            scaled;
    struct perf_counts_values aggr;
    struct perf_counts_values cpu[];
};


perf stat's CALL CHAIN

CALL CHAIN: 
commands // tools/perf/perf.c
    cmd_stat // tools/perf/builtin-stat.c
        parse_events_option // If perf stat -e xxx, specified event name, will check this event name 
            parse_events
                parse_events__scanner // check events 
                    parse_events_lex_init_extra
                    parse_events__scan_string
                    parse_events_parse
                    parse_events__flush_buffer
                    parse_events__delete_buffer
                    parse_events_lex_destroy
                perf_pmu__parse_cleanup:
        perf_evlist__new();
            perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) // evlist->cpus, evlist->threads
                perf_evlist__set_maps /// 
        parse_options
        parse_options_usage
        add_default_attributes()
        target__validate(&target);
        perf_evlist__create_maps(evsel_list, &target) // fill out evlist->threads(thread_map)
            evlist->threads = thread_map__new_str(target->pid, target->tid,target->uid); // evlist->threads
            evlist->threads(thread_map) = [tid,tid,tid,tid,...]
            target__uses_dummy_map(target) 
                evlist->cpus = cpu_map__dummy_new() // evlist->cpus
                evlist->cpus = cpu_map__new(target->cpu_list)
        perf_evlist__alloc_stats(evsel_list, interval)  // Traverse all evsel
            evlist__for_each(evlist, evsel) {
                perf_evsel__alloc_stat_priv(evsel) // Alloc memory for each evsel->priv = zalloc(sizeof(struct perf_stat));
                    perf_evsel__reset_stat_priv(evsel)
                        init_stats // Fill out "struct perf_stat", perf_stat include 3 elements of "struct stats{}"
                perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) //  Alloc evsel->counts
                alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) // Alloc evsel->prev_raw_counts =  addr;
            }
        perf_stat_init_aggr_mode()
            cpu_map__build_socket_map
                cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
                cpu_map__get_socket
            cpu_map__build_core_map
                cpu_map__build_map(cpus, corep, cpu_map__get_core);
                cpu_map__get_core
                    cpu_map__get_socket
        
        run_perf_stat(argc, argv);
            __run_perf_stat(argc, argv);
                perf_evlist__prepare_workload(evsel_list, &target, argv, false, workload_exec_failed_signal)
                perf_evlist__set_leader(evsel_list); // evlist->nr_groups  = 1 or 0 ? decide by evlist->nr_entries > 1 or not
                    __perf_evlist__set_leader(&evlist->entries);
                    evlist__for_each(evsel_list, evsel) {  // Traverse all evsel
                        create_perf_stat_counter(evsel)
                            struct perf_event_attr *attr = &evsel->attr;
                            attr->xxx  = xxx
                            perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)
                            perf_evsel__is_group_leader(evsel)
                            perf_evsel__open_per_thread(evsel, evsel_list->threads)
                                // important: __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads)
                                __perf_evsel__open(evsel, &empty_cpu_map.map, threads) 
                                    // perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads), if system_wide: nthreads = 1
                                    perf_evsel__alloc_fd(evsel, cpus->nr, nthreads)
                                        evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
                                    for (cpu = 0; cpu < cpus->nr; cpu++) {
                                         for (thread = 0; thread < nthreads; thread++) { 
                                            group_fd = get_group_fd(evsel, cpu, thread);
                                            sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags);
                                         }
                                    }           
                    }
                    perf_evlist__apply_filters(evsel_list, &counter)
                    evlist__for_each(evlist, evsel) {
                        perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter);
                    }
                    t0 = rdclock();
                    clock_gettime(CLOCK_MONOTONIC, &ref_time);
                    if (forks) { 
                        perf_evlist__start_workload(evsel_list);
                        handle_initial_delay();
                        if (interval) {
                            print_interval();
                        }
                    } else {
                        handle_initial_delay();
                        print_interval();
                    }
                    t1 = rdclock();

                    update_stats(&walltime_nsecs_stats, t1 - t0);

                    // 开始为每个evsel读
                    if (aggr_mode == AGGR_GLOBAL) {
                        evlist__for_each(evsel_list, counter) {
                            // 读到struct: "struct perf_counts_values", 保存在evsel的 &counter->counts->aggr , (这里evsel 就是counter)
                            // 还有“struct perf_stat” , counter->priv
                            read_counter_aggr(counter); 
                                aggr->val = aggr->ena = aggr->run = 0; // 这里, 把 perf_counts_values aggr 全部初始化为0 
                                read_counter(counter)  // 如何读此event?遍历每个thread和cpu
                                    int nthreads = thread_map__nr(evsel_list->threads);
                                    int ncpus = perf_evsel__nr_cpus(counter);
                                    int cpu, thread;
                                    for (thread = 0; thread < nthreads; thread++) {
                                        for (cpu = 0; cpu < ncpus; cpu++) {
                                            // pocess + cpu 二维数组方式读, 读到 "struct  perf_counts_values count"
                                            process_per_cpu(struct perf_evsel *evsel, int cpu, int thread))
                                                perf_evsel__read_cb(evsel, cpu, thread, &count)
                                                    memset(count, 0, sizeof(*count));
                                                    FD(evsel, cpu, thread)
                                                    readn(FD(evsel, cpu, thread), count, sizeof(*count))
                                                        ion(true, fd, buf, n);
                                                            read(fd, buf, left)
                                                            
                                                read_cb(evsel, cpu, thread, tmp);
                                                    switch (aggr_mode) {
                                                        case AGGR_CORE:
                                                        case AGGR_SOCKET:
                                                        case AGGR_NONE:
                                                        perf_evsel__compute_deltas(evsel, cpu, count);
                                                        perf_counts_values__scale(count, scale, NULL);
                                                        update_shadow_stats(evsel, count->values, cpu);
                                                    
                                                    }
                                        }
                                    }   
                            perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), thread_map__nr(evsel_list->threads));
                        }
                    } else {
                        evlist__for_each(evsel_list, counter) {
                            read_counter(counter);
                            perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
                        }
                    }
            
        print_stat 
            print_aggr // AGGR_CORE AGGR_SOCKET
            print_counter_aggr(evsel, NULL); // AGGR_GLOBAL
            print_counter(evsel, NULL) // AGGR_NONE
tools/perf/util/evsel.h

struct perf_evsel {

}

转载于:https://www.cnblogs.com/muahao/p/8933384.html

google-perftools 简介 google-perftools 是一款针对 C/C++ 程序的性能分析工具,它是一个遵守 BSD 协议的开源项目。使用该工具可以对 CPU 时间片、内存等系统资源的分配和使用进行分析,本文将重点介绍如何进行 CPU 时间片的剖析。 google-perftools 对一个程序的 CPU 性能剖析包括以下几个步骤。 1. 编译目标程序,加入对 google-perftools 库的依赖。 2. 运行目标程序,并用某种方式启动 / 终止剖析函数并产生剖析结果。 3. 运行剖结果转换工具,将不可读的结果数据转化成某种格式的文档(例如 pdf,txt,gv 等)。 安装 您可以在 google-perftools 的网站 (http://code.google.com/p/google-perftools/downloads/list) 上下载最新版的安装包。为完成步骤 3 的工作,您还需要一个将剖析结果转化为程序员可读文档的工具,例如 gv(http://www.gnu.org/software/gv/)。 编译与运行 您需要在原有的编译选项中加入对 libprofiler.so 的引用,这样在目标程序运行时会加载工具的动态库。例如本例中作者的系统中,libprofiler.so 安装在"/usr/lib"目录下,所以需要在 makefile 文件中的编译选项加入“-L/usr/lib -lprofiler”。 google-perftools 需要在目标代码的开始和结尾点分别调用剖析模块的启动和终止函数,这样在目标程序运行时就可以对这段时间内程序实际占用的 CPU 时间片进行统计和分析。工具的启动和终止可以采用以下两种方式。 a. 使用调试工具 gdb 在程序中手动运行性能工具的启动 / 终止函数。 gdb 是 Linux 上广泛使用的调试工具,它提供了强大的命令行功能,使我们可以在程序运行时插入断点并在断点处执行其他函数。具体的文档请参照 http://www.gnu.org/software/gdb/,本文中将只对用到的几个基本功能进行简单介绍。使用以下几个功能就可以满足我们性能调试的基本需求,具体使用请参见下文示例。 命令 功能 ctrl+c 暂停程序的运行 c 继续程序的运行 b 添加函数断点(参数可以是源代码中的行号或者一个函数名) p 打印某个量的值或者执行一个函数调用 b. 在目标代码中直接加入性能工具函数的调用,该方法就是在程序代码中直接加入调试函数的调用。 两种方式都需要对目标程序重新编译,加入对性能工具的库依赖。对于前者,他的好处是使用比较灵活,但工具的启动和终止依赖于程序员的手动操作,常常需要一些暂停函数(比如休眠 sleep)的支持才能达到控制程序的目的,因此精度可能受到影响。对于后者,它需要对目标代码的进行修改,需要处理函数声明等问题,但得到的结果精度较高,缺点是每次重新设置启动点都需要重新编译,灵活度不高,读者可以根据自己的实际需求采用有效的方式。 示例详解 该程序是一个简单的例子,文中有两处耗时的无用操作,并且二者间有一定的调用关系。 清单 1. 示例程序 void consumeSomeCPUTime1(int input){ int i = 0; input++; while(i++ < 10000){ i--; i++; i--; i++; } }; void consumeSomeCPUTime2(int input){ input++; consumeSomeCPUTime1(input); int i = 0; while(i++ < 10000){ i--; i++; i--; i++; } }; int stupidComputing(int a, int b){ int i = 0; while( i++ < 10000){ consumeSomeCPUTime1(i); } int j = 0; while(j++ < 5000){ consumeSomeCPUTime2(j); } return a+b; }; int smartComputing(int a, int b){ return a+b; }; void main(){ int i = 0; printf("reached the start point of performance bottle neck\n"); sleep(5); //ProfilerStart("CPUProfile"); while( i++ MyProfile.pdf 转换后产生的结果文档如下图。图中的数字和框体的大小代表了的某个函数的运行时间占整个剖析时间的比例。由代码的逻辑可知,stupidComputing,stupidComputing2 都是费时操作并且它们和 consumeSomeCPUTime 存在着一定的调用关系。 图 1. 剖析结果 结束语 本文介绍了一个 Linux 平台上的性能剖析工具 google-perftools,并结合实例向读者展示了如何使用该工具配置、使用及分析性能瓶颈。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值