ftrace_init

MIPS在应用ftrace功能之前,需要添加-pg编译选项,该选项用于编译_mcount()函数,_mcount函数的具体实现可由各个架构自行决定。随后,在链接过程中,将_mcount函数链接到其余函数中。

MIPS中,_mcount函数的定义如下:

#ifdef CONFIG_DYNAMIC_FTRACE

NESTED(ftrace_caller, PT_SIZE, ra)
	.globl _mcount
_mcount:
EXPORT_SYMBOL(_mcount)
	b	ftrace_stub
#ifdef CONFIG_32BIT
	 addiu sp,sp,8
#else
	 nop
#endif

	MCOUNT_SAVE_REGS
#ifdef KBUILD_MCOUNT_RA_ADDRESS
	PTR_S	MCOUNT_RA_ADDRESS_REG, PT_R12(sp)
#endif

	PTR_SUBU a0, ra, 8
	PTR_LA   t1, _stext
	sltu     t2, a0, t1
	PTR_LA   t1, _etext
	sltu     t3, t1, a0
	or       t1, t2, t3
	beqz     t1, ftrace_call
	 nop
#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
	PTR_SUBU a0, a0, 16
#else
	PTR_SUBU a0, a0, 12
#endif

	.globl ftrace_call
ftrace_call:
	nop
	 move	a1, AT

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	.globl ftrace_graph_call
ftrace_graph_call:
	nop
	 nop
#endif

	MCOUNT_RESTORE_REGS
	.globl ftrace_stub
ftrace_stub:
	RETURN_BACK
	END(ftrace_caller)

可以看到,_mcount的定义与ftrace_caller的定义位于同一入口。因此,内核编译完成后,反汇编代码会出现:

ffffffff802019b0 <get_system_type>:
get_system_type():
ffffffff802019b0:       03e0082d        move    at,ra
ffffffff802019b4:       0c088904        jal     ffffffff80222410 <ftrace_caller>

通过System.map可发现_mcount与ftrace_caller的地址一致。

内核在启动过程中会对ftrace进行初始化,入口为:start_kernel()—>ftrace_init(),(init/main.c)。

void __init ftrace_init(void)
{
	extern unsigned long __start_mcount_loc[];
	extern unsigned long __stop_mcount_loc[];
	//__start_mcount_loc与__stp_mcount_loc变量由内核编译的过程中进行赋值

	unsigned long count, flags;
	int ret;

	local_irq_save(flags);
	ret = ftrace_dyn_arch_init();
	//该接口主要用来创建两条指令码,以及将_mcount()函数入口的第一条指令设置为nop指令

	local_irq_restore(flags);	

	if (ret)
		goto failed;
	
	count = __stop_mcount_loc - __start_mcount_loc;
	//计算内核代码中共有多少位置为jal ftrace_caller指令

	if (!count) {
		pr_info("ftrace: No functions to be traced?\n");
		goto failed;
	}

	pr_info("ftrace: allocating %ld entries in %ld pages\n", count, count / ENTRIES_PER_PAGE + 1);

	last_ftrace_enabled = ftrace_enabled = 1;

	ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc);
	//记录_mcount函数所在的位置,主要通过ftrace_page以及dyn_ftrace结构体来完成

	set_ftrace_early_filters();

	return ;
failed:
	ftrace_disabled = 1;
}

int __init ftrace_dyn_arch_init(void)
{
	ftrace_dyn_arch_init_insns();
	//创建两条指令码,分别为:la v1, _mcount; jal ftrace_caller+8
	
	ftrace_modify_code(MCOUNT_ADDR, INSN_NOP);
	//#define MCOUNT_ADDR ((unsigned long)(_mcount));MCOUNT_ADDR为_mcount()函数的入口地址
	//#define INSN_NOP 0x00000000
	//将_mcount()函数入口指令b ftrace_stub修改为nop指令

	return 0;
}

//全局变量
static unsigned int insn_jal_ftrace_caller __read_mostly;
static unsigned int insn_la_mcount[2] __read_mostly;

static inline void ftrace_dyn_arch_init_insns(void)
{
	u32 *buf;
	unsigned int v1;

	v1 = 3;
	buf = (u32 *)&insn_la_mcount[0];
	UASM_i_LA(&buf, v1, MCOUNT_ADDR);
	//buf存放指令码:la v1, _mcount

	buf = (u32 *)&insn_jal_ftrace_caller;
	uasm_i_jal(&buf, (FTRAE_ADDR + 8) & JUMP_RANGE_MASK);
	//#define FTRACE_ADDR ((unsigned long)ftrace_caller);FTRACE_ADDR为ftrace_caller()函数的入口地址
	//buf中存放指令码:jal ftrace_caller+8
}

/*
//addr为_mcount()函数的入口地址,rs为寄存器编号
void UASM_i_LA(u32 **buf, unsigned int rs, long addr)
{
	UASM_i_LA_mostly(buf, rs, addr);

	if (uasm_rel_lo(addr)) {
		if (!uasm_in_compat_space_p(addr))
			uasm_i_daddiu(buf, rs, rs, uasm_rel_lo(addr));
		else
			uasm_i_addiu(buf, rs, rs, uasm_rel_lo(addr));
	}
}

//
void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
{
	if (!uasm_in_compat_space_p(addr)) {
	//如果地址为64位,则按照该分支处理

		uasm_i_lui(buf, rs, uasm_rel_highest(addr));
		if (uasm_rel_higher(addr))
			uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
		if (uasm_rel_hi(addr)) {
			uasm_i_dsll(buf, rs, rs, 16);
			uasm_i_daddiu(buf, rs, rs, uasm_rel_hi(addr));
			uasm_i_dsll(buf, rs, rs, 16);
		} else
			uasm_i_dsll32(buf, rs, rs, 0);
	} else
	//如果地址为32位,则按照该分支处理

		uasm_i_lui(buf, rs, uasm_rel_hi(addr));
}

int uasm_i_compat_space_p(long addr)
{
	//检测地址是否为32位地址,是返回1,否返回0
	return addr == (int)addr;
}

//关于上述接口中所调用的uasm_i_xxx()接口的声明如下:
#define Ip_u1s2(op) void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)

#define I_u1s2(op)	Ip_u1s2(op) { build_insn(buf, insn##op, a, b); } \
					UASM_EXPORT_SYMBOL(uasm_i##op);

#define UASM_EXPORT_SYMBOL EXPORT_SYMBOL

//因此,uasm_i_lui接口的实际原型为:
void uasm_i_lui(u32 **buf, unsigned int a, unsigned int b)
{
	build_insn(buf, insn_lui, a, b);
}
EXPORT_SYMBOL(uasm_i_lui);

//宏定义表示为:
I_u1s2(_lui);

//由上可知,实际执行函数为build_insn()接口,该接口操作前,需实例化数组对象insn_table,如下:
//枚举类型,即为每条操作码设置一个索引号
enum opcode {
	...,
	insn_lui,
	...
};

struct insn {
	u32 match;
	enum fields fields;
};
//insn_table数组对象
static const struct insn insn_table[insn_invalid] = {
	...,
	[insn_lui] = {M(lui_op, 0, 0, 0, 0, 0), RT | SIMM};
	...
};

#define M(a, b ,c, d, e, f) ((a) << OP_SH | (b) << RS_SH | (c) << RT_SH | (d) << RD_SH | (e) << RE_SH | (f) << FUNC_SH)

//假设当前指令操作码为lui,则该指令为:
struct insn insn_table[insn_lui];
&insn_table[insn_lui]->match = 15 << 26;
//match属性用于存放操作码
&insn_table[insn_lui]->fields =  RT | SIMM;
//fields属性用于存放src与dst

//构建指令码,即opc dst/src src/dst
static void build_insn(u32 **buf, enum opcode opc, ...)
{
	const struct insn *ip;
	va_list ap;
	u32 op;

	if (opc < 0 || opc >= insn_invalid || (opc == insn_daddiu && r4k_daddiu_bug()) || (insn_table[opc].match == 0 && insn_table[opc].fields == 0))
		panic("Unsupported Micro-assembler instruction %d", opc);

	ip = &insn_table[opc];
	//获取insn_table数组中opc索引所对应的struct insn结构体对象

	op = ip->match;

	//操作码与dst或src进行组合,拼成指令码
	va_start(ap, opc);
	...
	if (ip->fields & RT)
		op |= build_rt(va_arg(ap, u32));
	...
	if (ip->fields & SIMM)
		op |= build_simm(va_arg(ap, u32));
	...
	va_end(ap);

	**buf = op;
	//赋值操作

	(*buf)++;
}

static inline u32 build_rt(u32 arg)
{
	WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler filed overflow\n");
	
	return (arg & RT_MASK) << RT_SH;
	//#define RT_MASK 0x1f
	//#define RT_SH 16
}

static inline u32 build_simm(s32 arg)
{
	WARN(arg > 0x7fff || arg < -0x8000, KERN_WARNING "Micro-assembler field overflow\n");
	
	return arg & 0xffff;
}
*/

//综上,可知替换指令已经创建完成
//替换指令
static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
{
	int faulted;
	mm_segment_t old_fs;

	safe_store_code(new_code, ip, faulted);

	if (unlikely(faulted))
		return -EFAULT;

	old_fs = get_fs();
	set_fs(get_ds());
	flush_icache_range(ip, ip + 8);
	set_fs(old_fs);

	return 0;
}

#define safe_store_code(src, dst, error) safe_store(STR(sw), src, dst, error)

#define safe_store(store, src, dst, error)
do {
	asm volatile (
		"1: " store " %[tmp_src], 0(%[tmp_dst])\n"
		//store变量为sw指令,该指令作用为:
		//%[tmp_dst] = %[tmp_src]
		//%[addr],取addr地址处的值

		"	li %[tmp_err], 0\n"
		//将0写入tmp_err地址处
		
		"2: .insn\n"

		".section .fixup, \"ax\"\n"
		"3: li %[tmp_err], 1\n"
		"	j 2b\n"
		".previous\n"

		".section\t__ex_table,\"a\"\n\t"
		STR(PTR) "\t1b, 3b\n\t"
		//#define PTR .word

		".previous\n"

		: [tmp_err] "=r" (error)
		//输出

		: [tmp_dst] "r" (dst), [tmp_src] "r" (src)
		//输入
		
		: "memory"
		//操作均在内存中完成
	);
} while (0)
//注意,汇编中src实际为&INSN_NOP

//flush_icache_range():
static void local_loongson3_flush_icache_range(unsigned long start, unsigned long end)
{
	asm volatile ("\tsynci 0($0)\n"::);
	//同步指令数据缓存
}

static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end)
{
	struct ftrace_page *start_pg;
	struct ftrace_page *pg;
	struct dyn_ftrace *rec;
	unsigned long count;
	unsigned long *p;
	unsigned long addr;
	unsigned long flags = 0;
	int ret = -ENOMEM;

	count = end - start;

	if (!count)
		return 0;

	sort(start, count, sizeof(*start), ftrace_cmp_ips, NULL);
	//对记录的_mcount地址进行排序

	start_pg = ftrace_allocate_pages(count);
	//创建ftrace_page结构体对象
	//该过程中,创建的组织结构可能为:
	/*
	 *|-------------|
	 *| ftrace_page |
	 *|+++++++++++++|
	 *| next        |------>|-------------|
     *|+++++++++++++|       | ftrace_page |
	 *| records     |-->|
	 *|+++++++++++++|   |
	 *|             |   |---------------------------------|
	 *                  | dyn_ftrace0 | dtn_ftrace1 | ... |
	 *                  |---------------------------------|
	 */
	//dyn_ftrace结构体记录_mcount函数被调用的位置

	if (!start_pg)
		return -ENOMEM;

	mutex_lock(&ftrace_lock);

	if (!mod) {
		WARN_ON(ftrace_pages || ftrace_pages_start);
		ftrace_pages = ftrace_pages_start = start_pg;
	} else {
		if (!ftrace_pages)
			goto out;
	
		if (WARN_ON(ftrace_pages->next)) {
			while (ftrace_pages->next)
				ftrace_pages = ftrace_pages->next;
		}
	
		ftrace_pages->next = start_pg;
		//单向循环链表
	}

	p = start;
	
	pg = start_pg;
	while (p < end) {
		addr = ftrace_call_adjust(*p++);

		if (!addr)
			continue;

		if (pg->index == pg->size) {
		//当前ftrace_page空间无法存放所有_macount信息时,将使用下一个ftrace_page

			if (WARN_ON(!pg->next))
				break;
			pg = pg->next;
		}

		rec = &pg->records[pg->index++];
		//从索引0开始填充ftrac_page结构体中的dyn_ftrace结构体对象

		rec->ip = addr;
		//记录_mcount函数被调用的位置
	}

	WARN_ON(pg->next);

	ftrace_pages = pg;
	
	if (!mod)
		local_irq_save(flags);
	ftrace_update_code(mod, start_pg);
	//替换_mcount函数被调用的位置的指令

	if (!mod)
		local_irq_restore(flags);
	ret = 0;
out:
	mutex_unlock(&ftrace_lock);

	return ret;
}

static struct ftrace_page *ftrace_allocate_pages(unsigned long num_to_init)
{
	struct ftrace_page *start_pg;
	struct ftrace_page *pg;
	int order;
	int cnt;

	if (!num_to_init)
		return 0;

	start_pg = pg = kzalloc(szieof(*pg), GFP_KERNEL);
	if (!pg)
		return NULL;

	for (;;) {
		cnt = ftrace_allocate_records(pg, num_to_init);
		//计算可包含records个数,该过程中以连续的空闲页分配地址空间
		//cnt <= num_to_init

		if (cnt < 0)
			goto free_pages;

		num_to_init -= cnt;
		if (!num_to_init)
			break;
		//判断当前ftrace_page是否包含所有records,如果没有则创建新的ftrace_page来包含

		pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
		if (!pg->next)
			goto free_pages;

		pg = pg->next;
	}

	return start_pg;

free_pages:
	pg = start_pg;
	while (pg) {
		order = get_count_order(pg->size / ENTRIES_PER_PAGE);
		free_pages((unsigned long)pg->records, order);
		start_pg = pg->next;
		kfree(pg);
		pg =start_pg;
	}
	pr_info("ftrace: FAILED to allocate memory for functions\n");
	return NULL;
}

static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
	int order;
	int cnt;
	
	if (WARN_ON(!count))
		return -EINVAL;

	order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
	//获取幂数

	while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
		order--;
	//对幂数进行调整

again:
	pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
	//分配空闲页,ftrace_page结构体中records指向该页首地址

	if (!pg->records) {
		if (!order)
			return  -ENOMEM;
		order >>= 1;
		goto again;
	}
	
	cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
	pg->size = cnt;

	if (cnt > count)
		cnt = count;

	return cnt;
}

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
	return addr;
}

static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
	struct ftrace_page *pg;
	struct dyn_ftrace *p;
	u64 start, stop;
	unsigned long update_cnt = 0;
	unsigned long rec_flags = 0;
	int i;
	
	start = ftrace_now(raw_smp_processor_id());

	if (mod)
		rec_flags |= FTRACE_FL_DISABLED;

	for (pg = new_pgs; pg; pg = pg->next) {
	//遍历所有的ftrace_page	

		for (i = 0; i < pg->next; i++) {
		//遍历ftrace_page中所有的dyn_ftrace结构体

			if (unlikely(ftrace_disabled))
				return -1;

			p = &pg->records[i];
			p->flags = rec_flags;
	
			if (!__is_defined(CC_USING_NOP_MCOUNT) && !ftrace_code_disable(mod, p))
			//将调用_mcout()接口的指令修改为nop指令
				break;

			update_cnt++;
		}
	}

	stop = ftrace_now(raw_smp_processor_id());
	ftrace_update_time = stop - start;	
	ftrace_update_tot_cnt += update_cnt;

	return 0;
}

static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
	int ret;
	
	if (unlikely(ftrace_disabled))
		return 0;

	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
	//将调用_mcount函数的指令替换为nop指令

	if (ret) {
		ftrace_bug_type = FTRACE_BUG_INIT;
		ftrace_bug(ret, rec);
		return 0;
	}
	return 1;
}

int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
	unsigned int new;
	unsigned long ip = rec->ip;

	new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
	//判断ip地址的位置,来选择替换的指令
	//如果ip地址处于内核而非模块中,则使用INSN_NOP,否则使用后者
	
#ifdef CONFIG_64BIT
	return ftrace_modify_code(ip, new);
#else
	
	return ftrace_modify_code_2(ip, new, INSN_NOP);
#endif
}

int notrace core_kernel_text(unsigned long addr)
{
	if (addr >= (unsigned long)_stext && add < (unsigned long)_etext)
		return 1;

	if (system_state < SYSTEM_RUNNING && init_kernel_text(addr))
		return 1;
	return 0;
}

int init_kernel_text(unsigned long addr)
{
        if (addr >= (unsigned long)_sinittext &&
            addr < (unsigned long)_einittext)
                return 1;
        return 0;
}


//利用ftrace设置启动阶段时的过滤点
/*
static void __init set_ftrace_early_filters(void)
{
	if (ftrace_filter_buf[0])
		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
	...
}

static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;

static int __init set_ftrace_filter(char *str)
{
	ftrace_filter_param = true;
	strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
	return 1;
}
__setup("ftrace_filter=", set_ftrace_filter);
*/
//该类过滤点需要在启动命令行中设置,否则为空
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值