Android性能分析工具perfetto (2)pefetto原理简析

最新推荐文章于 2024-06-30 02:35:47 发布

wellt606

最新推荐文章于 2024-06-30 02:35:47 发布

阅读量1.2k

点赞数

文章标签： android linux

本文链接：https://blog.csdn.net/wellt606/article/details/133688939

版权

1. 基本框架

perfetto性能维测框架简化后，如下图所示。内核的ring buffer缓冲区，用于保存埋点数据；用户程序通过写debugfs对应节点/sys/kernel/debug/tracing/trace_marker，将用户空间埋点数据写到内核ring buffer缓冲区中；内核相关的信息，由trace event桩函数获取；perfetto相关程序通过读取节点/sys/kernel/debug/tracing/per_cpu/cpu[0-7]/trace，从ring buffer中读取数据。perfetto抓取的trace数据在网页https://ui.perfetto.dev/上实现可视化。

2. 函数运行时长

使用perfetto分析性能问题时，查看某个流程中函数运行时长，是寻找阻塞点的常用方法。函数运行时长是怎么来的呢？以SurfaceFlinger的onMessageReceived为例，进入函数时调用宏 ATRACE_CALL()。ATRACE_CALL()-->ATRACE_NAME(__FUNCTION__)以函数名作为参数，new一个EglScopedTrace类型的局部变量，在EglScopedTrace的构造函数中调用atrace_begin()。atrace_begin()-->atrace_begin_body()调用WRITE_MSG()，将字符串"B|pid|函数名"通过节点/sys/kernel/debug/tracing/trace_marker写入内核ring buffer。

//SurfaceFlinger::onMessageReceived调用ATRACE_CALL()
void SurfaceFlinger::onMessageReceived(int32_t what) {
    ATRACE_CALL();

#define _PASTE(x, y) x ## y
#define PASTE(x, y) _PASTE(x,y)
//定义局部变量
#define ATRACE_NAME(name) android::EglScopedTrace PASTE(___tracer, __LINE__) (ATRACE_TAG, name)
#define ATRACE_CALL() ATRACE_NAME(__FUNCTION__)

class EglScopedTrace {
public:
    //构造函数
    inline EglScopedTrace(uint64_t tag, const char* name) : mTag(tag) {
        atrace_begin(mTag, name);
    }
    //析构函数
    inline ~EglScopedTrace() {
        atrace_end(mTag);
    }

private:
    uint64_t mTag;
};
static inline void atrace_begin(uint64_t tag, const char* name)
{
    if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
        void atrace_begin_body(const char*);
        atrace_begin_body(name);
    }
}

void atrace_begin_body(const char* name)
{
    WRITE_MSG("B|%d|", "%s", name, "");
}
//"B|pid|函数名"写入节点
#define WRITE_MSG(format_begin, format_end, name, value) { \
    char buf[ATRACE_MESSAGE_LENGTH]; \
    int pid = getpid(); \
    int len = snprintf(buf, sizeof(buf), format_begin "%s" format_end, pid, \
        name, value); \
    if (len >= (int) sizeof(buf)) { \
        /* Given the sizeof(buf), and all of the current format buffers, \
         * it is impossible for name_len to be < 0 if len >= sizeof(buf). */ \
        int name_len = strlen(name) - (len - sizeof(buf)) - 1; \
        /* Truncate the name to make the message fit. */ \
        ALOGW("Truncated name in %s: %s\n", __FUNCTION__, name); \
        len = snprintf(buf, sizeof(buf), format_begin "%.*s" format_end, pid, \
            name_len, name, value); \
    } \
    write(atrace_marker_fd, buf, len); \
}

onMessageReceived()退出时，函数体内定义的局部变量会自动析构，也包括 ATRACE_CALL()定义的EglScopedTrace类型局部变量。EglScopedTrace类型的析构函数调用atrace_end()，atrace_end()-->atrace_begin_body()-->WRITE_MSG()，将字符串"E|pid"写入内核ring buffer。字符串写入ring buffer时会自动记录时间戳，这样可视化工具在trace日志中找到同pid且相邻"B|pid|函数名"、"E|pid"，它们之间的时间差就是函数运行时长。

static inline void atrace_end(uint64_t tag)
{
    if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
        void atrace_end_body();
        atrace_end_body();
    }
}

void atrace_end_body()
{
    WRITE_MSG("E|%d", "%s", "", "");
}

3. 进程信息

3.1 binder通信对端进程

在perfetto中点击binder transaction或者binder reply，会有一条连接到binder通信对端的连线。binder通信对端进程数据，是桩函数trace_binder_transaction、trace_binder_transaction_received提供的。在发送端，桩函数trace_binder_transaction记录binder_transaction的debug_id、对端线程tid等信息，对端收到后在trace_binder_transaction_received记录收到binder_transaction的debug_id，写ring buffer会自动记录本线程的tid，这样可视化工具就能通过debug_id和tid将两端联系起来。

TRACE_EVENT(binder_transaction,
	TP_PROTO(bool reply, struct binder_transaction *t,
		 struct binder_node *target_node),
	TP_ARGS(reply, t, target_node),
    ......................................................................
	TP_fast_assign(
		__entry->debug_id = t->debug_id;
		__entry->target_node = target_node ? target_node->debug_id : 0;
		__entry->to_proc = t->to_proc->pid;
		__entry->to_thread = t->to_thread ? t->to_thread->pid : 0;
		__entry->reply = reply;
		__entry->code = t->code;
		__entry->flags = t->flags;
	),
    //发送端将打印binder_transaction的debug_id、对端线程的tid
	TP_printk("transaction=%d dest_node=%d dest_proc=%d dest_thread=%d reply=%d flags=0x%x code=0x%x",
		  __entry->debug_id, __entry->target_node,
		  __entry->to_proc, __entry->to_thread,
		  __entry->reply, __entry->flags, __entry->code)
);

TRACE_EVENT(binder_transaction_received,
	TP_PROTO(struct binder_transaction *t),
	TP_ARGS(t),

	TP_STRUCT__entry(
		__field(int, debug_id)
	),
	TP_fast_assign(
		__entry->debug_id = t->debug_id;
	),
    //接收端打印binder_transaction的debug_id
	TP_printk("transaction=%d", __entry->debug_id)
);

3.2进程状态、优先级

perfetto能显示进程的优先级，也能通过不同的颜色标明进程状态，进程优先级和进程状态是桩函数trace_sched_switch提供的。trace_sched_switch会记录切换前后两个进程的优先级、状态等信息。

TRACE_EVENT(sched_switch,

	TP_PROTO(bool preempt,
		 struct task_struct *prev,
		 struct task_struct *next),
         ............................................................................
	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
		__entry->prev_state & (TASK_STATE_MAX-1) ?
		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
				{ 128, "K" }, { 256, "W" }, { 512, "P" },
				{ 1024, "N" }) : "R",
		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
		__entry->next_comm, __entry->next_pid, __entry->next_prio)
);

3.3进程阻塞原因

如果进程处于不可中断睡眠状态，perfetto会显示是否是IO导致的不可中断睡眠状态。提供阻塞原因的桩函数是trace_sched_blocked_reason，其记录进程是否处于io_wait，以及调用函数的名称。

TRACE_EVENT(sched_blocked_reason,

	TP_PROTO(struct task_struct *tsk),

	TP_ARGS(tsk),

	TP_STRUCT__entry(
		__field( pid_t,	pid	)
		__field( void*, caller	)
		__field( bool, io_wait	)
	),

	TP_fast_assign(
		__entry->pid	= tsk->pid;
		__entry->caller = (void*)get_wchan(tsk);
		__entry->io_wait = tsk->in_iowait;
	),

	TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller)
);

3.4进程唤醒源

perfetto能够显示进程是内谁唤醒的，点击刚从sleep状态切换到runable状态的进程，能够显示唤醒进程的pid，这些信息是由桩函数trace_sched_wakeup提供的，其记录唤醒源进程的pid、comm、优先级等信息。

DECLARE_EVENT_CLASS(sched_wakeup_template,

	TP_PROTO(struct task_struct *p),

	TP_ARGS(__perf_task(p)),

	TP_STRUCT__entry(
		__array(	char,	comm,	TASK_COMM_LEN	)
		__field(	pid_t,	pid			)
		__field(	int,	prio			)
		__field(	int,	success			)
		__field(	int,	target_cpu		)
	),
    ......................................................
	TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
		  __entry->comm, __entry->pid, __entry->prio,
		  __entry->target_cpu)
);

wellt606

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Android性能分析工具perfetto (2)pefetto原理简析

函数运行时长是怎么来的呢？在发送端，桩函数trace_binder_transaction记录binder_transaction的debug_id、对端线程tid等信息，对端收到后在trace_binder_transaction_received记录收到binder_transaction的debug_id，写ring buffer会自动记录本线程的tid，这样可视化工具就能通过debug_id和tid将两端联系起来。trace_sched_switch会记录切换前后两个进程的优先级、状态等信息。
复制链接

扫一扫