使用simpleperf跟踪自定义的perf events

目录

前言

tracepoint简介

添加tracepoint:

使用simpleperf 跟踪自定义的tracepoint


前言

  1. simpleperf可以跟踪指定的perf events,通过adb shell & simpleperf list可以查看当前设备支持的所有perf event。
  2. 但是如果这些perf event不能满足我们的需求,这时候需要自定义我们需要的event。

tracepoint简介

tracepoint 是内核提供的tracing机制,可以通过打开和关闭来probe 相应函数且对内核影响很小,tracepoint目前工作在ftrace框架内,使用ring buffer传输perf数据,针对开发者来说,tracepoint框架提供了现成的宏来帮助开发者定义和增加tracepoints。

我们直接看下系统现成的tracepoint是怎么定义的:

TRACE_EVENT(sched_switch,
 
 TP_PROTO(struct rq *rq, struct task_struct *prev,
      struct task_struct *next),
 
 TP_ARGS(rq, prev, next),
 
 TP_STRUCT__entry(
     __array(    char,   prev_comm,  TASK_COMM_LEN   )
     __field(    pid_t,  prev_pid            )
     __field(    int,    prev_prio           )
     __field(    long,   prev_state          )
     __array(    char,   next_comm,  TASK_COMM_LEN   )
     __field(    pid_t,  next_pid            )
     __field(    int,    next_prio           )
 ),
 
 TP_fast_assign(
     memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
     __entry->prev_pid    = prev->pid;
     __entry->prev_prio   = prev->prio;
     __entry->prev_state  = prev->state;
     memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
     __entry->next_pid    = next->pid;
     __entry->next_prio   = next->prio;
 ),
 
 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
     __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
     __entry->prev_state ?
       __print_flags(__entry->prev_state, "|",
             { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
             { 16, "Z" }, { 32, "X" }, { 64, "x" },
             { 128, "W" }) : "R",
     __entry->next_comm, __entry->next_pid, __entry->next_prio)
);
TRACE_EVENT 宏总共由6部分组成:
  1. name: 自定义的tracepoint名称,使用此tracepoint时,在名称前加trace_,使用trace_defined的方法来追踪指定函数。
  2. TP_PROTO:函数原型,指定参数类型
  3. ARGS: 函数具体参数
  4. TP_STRUCT__entry:需要传输到ring buffer的数据结构体
  5. TP_fast_assign:给TP_STRUCT__entry定义的结构体赋值,括号内是标准c代码,其中_entry是对结构体的引用
  6. TP_printk:将entry结构体中的数据输出

一旦像上面定义好tracepoint,就可以在具体的函数中添加trace:

if (likely(prev != next)) {
    if (!prev->on_rq)
        prev->last_sleep_ts = wallclock;
 
    update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, );
    update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, );
    rq->nr_switches++;
    rq->curr = next;
    ++*switch_count;
    // 在具体的方法通过trace_definedname的形式添加trace
    trace_sched_switch(preempt, prev, next);
 
    /* Also unlocks the rq: */
    rq = context_switch(rq, prev, next, &rf);
} else {
    update_task_ravg(prev, rq, TASK_UPDATE, wallclock, );
    rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
    rq_unlock_irq(rq, &rf);
}

如果同类型的tracepoint即参数相同,但是使用场景稍有区分,可以使用 DECLARE_EVENT_CLASS 来统一管理,DECLARE_EVENT_CLASS和TRACE_EVENT拥有一模一样的6个部分,不同的是DECLARE_EVENT_CLASS里声明的name 是calss name,当定义具体的event的时候使用DEFINE_EVENT宏,第一个参数先带上这个calss name。

比如我们已经按照上面TRACE_EVENT的形式定义了一个名为sched_wakeup_template 的event class,后续的define event可以用如下方式定义:

//sched_wakeup_template是DECLARE_EVENT_CLASS定义的event class name
//sched_wakeup 是具体的event name
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
                TP_PROTO(struct rq *rq, struct task_struct *p, int success),
                TP_ARGS(rq, p, success));
 
 
//sched_wakeup_template是DECLARE_EVENT_CLASS定义的event class name
//sched_wakeup_new 是具体的event name
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
                TP_PROTO(struct rq *rq, struct task_struct *p, int success),
                TP_ARGS(rq, p, success));
通过上面的方式,先定义个event class作为模板,然后定义不同的event就达成了同类型trace event的快速定义。

添加tracepoint:

有了上面的简介铺垫,现在我们来实际添加一个tracepoint,这里的tracepoint添加在ion子系统中,因为ion定义的perf events没有memory alloc相关的事件,而这个事件在我们追踪ion 内存分配和管理时非常关注,所以以此为例。

首先看下ion alloc的相关方法,总共有三个:

struct dma_buf *ion_alloc(size_t len, unsigned int heap_id_mask,
              unsigned int flags)
 
 
int ion_alloc_fd(size_t len, unsigned int heap_id_mask, unsigned int flags)
 
 
struct dma_buf *ion_alloc_dmabuf(size_t len, unsigned int heap_id_mask,
                 unsigned int flags)

观察下三个方法的参数类型完全一致,于是我们在其现有的ion的trace header file中添加如下trace event class 和event:

注:ion的trace header file路径在:/sources/toco/kernel/msm-4.14/include/trace/events/ion.h

//定义一个ion_alloc_mem_class,并指定参数类型,具体参数
DECLARE_EVENT_CLASS(ion_alloc_mem_class,
    TP_PROTO(size_t len, unsigned int heap_id_mask,
              unsigned int flags),
    TP_ARGS(len, heap_id_mask, flags),
    TP_STRUCT__entry(
        __field(size_t, len)
        __field(unsigned int, heap_id_mask)
        __field(unsigned int, flags)
    ),
    TP_fast_assign(
        __entry->len = len;
        __entry->heap_id_mask = heap_id_mask;
        __entry->flags = flags;
    ),
 
 
    TP_printk("len %zu heap_id_mask %u flags %x\n",
        __entry->len,
        __entry->heap_id_mask,
        __entry->flags)
 
);
 
// 定义三个具体的trace event
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc,
    TP_PROTO(size_t len, unsigned int heap_id_mask,
              unsigned int flags),
 
    TP_ARGS(len, heap_id_mask, flags)
);
 
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc_fd,
    TP_PROTO(size_t len, unsigned int heap_id_mask,
              unsigned int flags),
 
    TP_ARGS(len, heap_id_mask, flags)
);
 
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc_dmabuf,
    TP_PROTO(size_t len, unsigned int heap_id_mask,
              unsigned int flags),
 
    TP_ARGS(len, heap_id_mask, flags)
);

然后在具体的代码中添加trace,因为ion_alloc和ion_alloc_fd都会最终调用到ion_alloc_dmabuf中,

我们直接在ion_alloc_dmabuf中添加相应的trace:

struct dma_buf *ion_alloc_dmabuf(size_t len, unsigned int heap_id_mask,
                 unsigned int flags)
{
    struct ion_device *dev = internal_dev;
    struct ion_buffer *buffer = NULL;
    struct ion_heap *heap;
    DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
    struct dma_buf *dmabuf;
    char task_comm[TASK_COMM_LEN];
 
 
    pr_debug("%s: len %zu heap_id_mask %u flags %x\n", __func__,
         len, heap_id_mask, flags);
 
    // 添加的trace
    trace_ion_alloc_dmabuf(len, heap_id_mask, flags);
 
    len = PAGE_ALIGN(len);
 
    if (!len)
        return ERR_PTR(-EINVAL);
 
    down_read(&dev->lock);
    plist_for_each_entry(heap, &dev->heaps, node) {
        /* if the caller didn't specify this heap id */
        if (!((1 << heap->id) & heap_id_mask))
            continue;
        buffer = ion_buffer_create(heap, dev, len, flags);
        if (!IS_ERR(buffer) || PTR_ERR(buffer) == -EINTR)
            break;
    }
    up_read(&dev->lock);
 
    if (!buffer)
        return ERR_PTR(-ENODEV);
 
    if (IS_ERR(buffer))
        return ERR_CAST(buffer);
 
    get_task_comm(task_comm, current->group_leader);
 
    exp_info.ops = &dma_buf_ops;
    exp_info.size = buffer->size;
    exp_info.flags = O_RDWR;
    exp_info.priv = buffer;
    exp_info.exp_name = kasprintf(GFP_KERNEL, "%s-%s-%d-%s", KBUILD_MODNAME,
                      heap->name, current->tgid, task_comm);
 
    dmabuf = dma_buf_export(&exp_info);
    if (IS_ERR(dmabuf)) {
        _ion_buffer_destroy(buffer);
        kfree(exp_info.exp_name);
    }
 
    return dmabuf;
}

至此我们完成了自定义添加的tracepoint

使用simpleperf 跟踪自定义的tracepoint

添加完自定义的tracepoint也就是perf event后,我们需要用simpleperf来追踪这个事件。

重新编译kernel烧写后,使用simpleperf list | grep ion: 来检查添加的tracepoint是否已经成功:

从上图可以看出我们增加的三个tracepoint已经添加成功

然后使用如下命令record camera provider,并且对ion_alloc_dmabuf进行过滤:


adb shell
 
 
simpleperf record -p 764 -e 'ion:ion_alloc_dmabuf' -o /sdcard/perf.data --call-graph dwarf
 
 
按ctrl-c结束record,有以下输出时,说明成功记录相应perf 数据:
simpleperf I cmd_record.cpp:635] Samples recorded: 313. Samples lost: 0.

将 perf.data pull出来之后进行simperf report:

这里使用simpleperf的report_sample脚本加上 --show_tracing_data参数将输出到ring buffer的数据同样打印出来。

最终得到的trace结果如下:

provider@2.4-se   1781 [007] 237.483212:        1 ion:ion_alloc_dmabuf:
ffffff870930a588  ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930a586  ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930aa9a  ion_alloc ([kernel.kallsyms])
ffffff87091575aa  cam_mem_mgr_alloc_and_map ([kernel.kallsyms])
ffffff8709155156  cam_private_ioctl ([kernel.kallsyms])
ffffff8709098d46  __video_do_ioctl ([kernel.kallsyms])
ffffff8709098806  video_usercopy ([kernel.kallsyms])
ffffff8709098b2a  video_ioctl2 ([kernel.kallsyms])
ffffff8709097f7a  v4l2_ioctl ([kernel.kallsyms])
ffffff870888387e  do_vfs_ioctl ([kernel.kallsyms])
ffffff8708883c56  sys_ioctl ([kernel.kallsyms])
ffffff870868487e  el0_svc_naked ([kernel.kallsyms])
      7db7ed91c8  __ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db7e937ec  ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db327b98c  CSLHwInternalDefaultIoctl2(CSLHwDevice const*, unsigned int, void*, unsigned int, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3270f84  CSLAllocHW(char const*, CSLBufferInfo*, unsigned long, unsigned long, unsigned int, int const*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3268f60  CSLAlloc (/vendor/lib64/hw/camera.qcom.so)
      7db31ffd04  CamX::MemPoolGroup::AllocateBuffers(unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db32026ac  CamX::MemPoolGroup::GetBufferFromPool(CamX::MemPoolBufferManager*, CSLBufferInfo*, CamX::NativeHandle const**) (/vendor/lib64/hw/camera.qcom.so)
      7db31fbef4  CamX::MemPoolMgr::GetBufferFromPool(void*, CSLBufferInfo*, CamX::NativeHandle const**) (/vendor/lib64/hw/camera.qcom.so)
      7db31dd810  CamX::ImageBuffer::Allocate() (/vendor/lib64/hw/camera.qcom.so)
      7db31e3808  CamX::ImageBufferManager::BindBufferManagerImageBuffer(CamX::ImageBuffer*) (/vendor/lib64/hw/camera.qcom.so)
      7db31dea50  CamX::ImageBuffer::BindBuffer() (/vendor/lib64/hw/camera.qcom.so)
      7db321ea10  CamX::Node::BindInputOutputBuffers(CamX::PerRequestActivePorts const*, int, int) (/vendor/lib64/hw/camera.qcom.so)
      7db321d760  CamX::Node::ProcessRequest(CamX::NodeProcessRequestData*, unsigned long) (/vendor/lib64/hw/camera.qcom.so)
      7db31ce534  CamX::DeferredRequestQueue::DeferredWorkerCore(CamX::Dependency*) (/vendor/lib64/hw/camera.qcom.so)
      7db31ce120  CamX::DeferredRequestQueue::DeferredWorkerWrapper(void*) (/vendor/lib64/hw/camera.qcom.so)
      7db31497a8  CamX::ThreadCore::DispatchJob(CamX::RuntimeJob*) (/vendor/lib64/hw/camera.qcom.so)
      7db314a078  CamX::ThreadCore::ProcessJobQueue() (/vendor/lib64/hw/camera.qcom.so)
      7db31499d0  CamX::ThreadCore::DoWork() (/vendor/lib64/hw/camera.qcom.so)
      7db314988c  CamX::ThreadCore::WorkerThreadBody(void*) (/vendor/lib64/hw/camera.qcom.so)
      7db7eee9d0  __pthread_start(void*) (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db7e8cb6c  __start_thread (/apex/com.android.runtime/lib64/bionic/libc.so)
      tracing data:
            common_type : 971
            common_flags : 0
            common_preempt_count : 1
            common_pid : 1781
            len : 24081792
            heap_id_mask : 34603008
            flags : 1
 
 
 
 
HwBinder:764_2    11526 [002] 235.334226:       1 ion:ion_alloc_dmabuf:
ffffff870930a588  ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930a586  ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930aa9a  ion_alloc ([kernel.kallsyms])
ffffff87091575aa  cam_mem_mgr_alloc_and_map ([kernel.kallsyms])
ffffff8709155156  cam_private_ioctl ([kernel.kallsyms])
ffffff8709098d46  __video_do_ioctl ([kernel.kallsyms])
ffffff8709098806  video_usercopy ([kernel.kallsyms])
ffffff8709098b2a  video_ioctl2 ([kernel.kallsyms])
ffffff8709097f7a  v4l2_ioctl ([kernel.kallsyms])
ffffff870888387e  do_vfs_ioctl ([kernel.kallsyms])
ffffff8708883c56  sys_ioctl ([kernel.kallsyms])
ffffff870868487e  el0_svc_naked ([kernel.kallsyms])
      7db7ed91c8  __ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db7e937ec  ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db327b98c  CSLHwInternalDefaultIoctl2(CSLHwDevice const*, unsigned int, void*, unsigned int, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3270f84  CSLAllocHW(char const*, CSLBufferInfo*, unsigned long, unsigned long, unsigned int, int const*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3268f60  CSLAlloc (/vendor/lib64/hw/camera.qcom.so)
      7db31cb85c  CamX::CmdBufferManager::InitializePool() (/vendor/lib64/hw/camera.qcom.so)
      7db31ca8a4  CamX::CmdBufferManager::Initialize(char const*, CamX::ResourceParams const*) (/vendor/lib64/hw/camera.qcom.so)
      7db31ca4a4  CamX::CmdBufferManager::CreateParentManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db31cab20  CamX::CmdBufferManager::CreateMultiManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3226198  CamX::Node::CreateMultiCmdBufferManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db2fb0ae4  CamX::IPENode::CreateFWCommandBufferManagers() (/vendor/lib64/hw/camera.qcom.so)
      7db2faf864  CamX::IPENode::InitializeStripingParams(_IpeConfigIoData*) (/vendor/lib64/hw/camera.qcom.so)
      7db2fae858  CamX::IPENode::SetupDeviceResource(CSLBufferInfo*, CSLDeviceResource*) (/vendor/lib64/hw/camera.qcom.so)
      7db2f9fbfc  CamX::IPENode::AcquireDevice() (/vendor/lib64/hw/camera.qcom.so)
      7db2f9ecfc  CamX::IPENode::PostPipelineCreate() (/vendor/lib64/hw/camera.qcom.so)
      7db3229894  CamX::Node::NotifyPipelineCreated() (/vendor/lib64/hw/camera.qcom.so)
      7db323cdbc  CamX::Pipeline::FinalizePipeline(CamX::FinalizeInitializationData*) (/vendor/lib64/hw/camera.qcom.so)
      7db3244908  CamX::Session::FinalizeDeferPipeline(unsigned int) (/vendor/lib64/hw/camera.qcom.so)
      7db3248324  CamX::Session::StreamOn(void*) (/vendor/lib64/hw/camera.qcom.so)
      7d8f464cb4  FeatureMFNR::ActivateOfflinePipeline(FeatureMFNR::MFNRStage) (/vendor/lib64/hw/com.qti.chi.override.so)
      7d8f468e30  FeatureMFNR::ExecuteMfnrRequest(FeatureMFNR::MFNRStage, unsigned int, unsigned int, ChiStreamBuffer*, unsigned int, ChiStreamBuffer*, ChiMetadata*, ChiMetadata*) (/vendor/lib64/hw/com.qti.chi.override.so)
      7d8f467b3c  FeatureMFNR::SubmitOfflinePostfilterStageRequest(unsigned int, camera3_capture_request*, FeatureMFNR::MFNRInputInfo*) (/vendor/lib64/hw/com.qti.chi.override.so)
      7d8f46582c  FeatureMFNR::SubmitOfflineMfnrRequest(unsigned int, camera3_capture_request*) (/vendor/lib64/hw/com.qti.chi.override.so)
      7d8f46dc3c  FeatureMFNR::RequestThreadProcessing() (/vendor/lib64/hw/com.qti.chi.override.so)
      7d8f45c924  FeatureMFNR::RequestThread(void*) (/vendor/lib64/hw/com.qti.chi.override.so)
      7db7eee9d0  __pthread_start(void*) (/apex/com.android.runtime/lib64/bionic/libc.so)
      7db7e8cb6c  __start_thread (/apex/com.android.runtime/lib64/bionic/libc.so)
      tracing data:
            common_type : 971
            common_flags : 0
            common_preempt_count : 1
            common_pid : 11526
            len : 3473408
            heap_id_mask : 34603008
            flags : 0

从用户空间到内核的ion alloc调用一目了然,可以帮助我们了解系统和定位问题。

如果使用simpleperf的report_html脚本: python report_html.py -i perf.data -o iontrace.html得到类似下图的结果:

这样会更加的直观。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

repinkply

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值