MIPS在应用ftrace功能之前,需要添加-pg编译选项,该选项用于编译_mcount()函数,_mcount函数的具体实现可由各个架构自行决定。随后,在链接过程中,将_mcount函数链接到其余函数中。
MIPS中,_mcount函数的定义如下:
#ifdef CONFIG_DYNAMIC_FTRACE
NESTED(ftrace_caller, PT_SIZE, ra)
.globl _mcount
_mcount:
EXPORT_SYMBOL(_mcount)
b ftrace_stub
#ifdef CONFIG_32BIT
addiu sp,sp,8
#else
nop
#endif
MCOUNT_SAVE_REGS
#ifdef KBUILD_MCOUNT_RA_ADDRESS
PTR_S MCOUNT_RA_ADDRESS_REG, PT_R12(sp)
#endif
PTR_SUBU a0, ra, 8
PTR_LA t1, _stext
sltu t2, a0, t1
PTR_LA t1, _etext
sltu t3, t1, a0
or t1, t2, t3
beqz t1, ftrace_call
nop
#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
PTR_SUBU a0, a0, 16
#else
PTR_SUBU a0, a0, 12
#endif
.globl ftrace_call
ftrace_call:
nop
move a1, AT
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
nop
nop
#endif
MCOUNT_RESTORE_REGS
.globl ftrace_stub
ftrace_stub:
RETURN_BACK
END(ftrace_caller)
可以看到,_mcount的定义与ftrace_caller的定义位于同一入口。因此,内核编译完成后,反汇编代码会出现:
ffffffff802019b0 <get_system_type>:
get_system_type():
ffffffff802019b0: 03e0082d move at,ra
ffffffff802019b4: 0c088904 jal ffffffff80222410 <ftrace_caller>
通过System.map可发现_mcount与ftrace_caller的地址一致。
内核在启动过程中会对ftrace进行初始化,入口为:start_kernel()—>ftrace_init(),(init/main.c)。
void __init ftrace_init(void)
{
extern unsigned long __start_mcount_loc[];
extern unsigned long __stop_mcount_loc[];
//__start_mcount_loc与__stp_mcount_loc变量由内核编译的过程中进行赋值
unsigned long count, flags;
int ret;
local_irq_save(flags);
ret = ftrace_dyn_arch_init();
//该接口主要用来创建两条指令码,以及将_mcount()函数入口的第一条指令设置为nop指令
local_irq_restore(flags);
if (ret)
goto failed;
count = __stop_mcount_loc - __start_mcount_loc;
//计算内核代码中共有多少位置为jal ftrace_caller指令
if (!count) {
pr_info("ftrace: No functions to be traced?\n");
goto failed;
}
pr_info("ftrace: allocating %ld entries in %ld pages\n", count, count / ENTRIES_PER_PAGE + 1);
last_ftrace_enabled = ftrace_enabled = 1;
ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc);
//记录_mcount函数所在的位置,主要通过ftrace_page以及dyn_ftrace结构体来完成
set_ftrace_early_filters();
return ;
failed:
ftrace_disabled = 1;
}
int __init ftrace_dyn_arch_init(void)
{
ftrace_dyn_arch_init_insns();
//创建两条指令码,分别为:la v1, _mcount; jal ftrace_caller+8
ftrace_modify_code(MCOUNT_ADDR, INSN_NOP);
//#define MCOUNT_ADDR ((unsigned long)(_mcount));MCOUNT_ADDR为_mcount()函数的入口地址
//#define INSN_NOP 0x00000000
//将_mcount()函数入口指令b ftrace_stub修改为nop指令
return 0;
}
//全局变量
static unsigned int insn_jal_ftrace_caller __read_mostly;
static unsigned int insn_la_mcount[2] __read_mostly;
static inline void ftrace_dyn_arch_init_insns(void)
{
u32 *buf;
unsigned int v1;
v1 = 3;
buf = (u32 *)&insn_la_mcount[0];
UASM_i_LA(&buf, v1, MCOUNT_ADDR);
//buf存放指令码:la v1, _mcount
buf = (u32 *)&insn_jal_ftrace_caller;
uasm_i_jal(&buf, (FTRAE_ADDR + 8) & JUMP_RANGE_MASK);
//#define FTRACE_ADDR ((unsigned long)ftrace_caller);FTRACE_ADDR为ftrace_caller()函数的入口地址
//buf中存放指令码:jal ftrace_caller+8
}
/*
//addr为_mcount()函数的入口地址,rs为寄存器编号
void UASM_i_LA(u32 **buf, unsigned int rs, long addr)
{
UASM_i_LA_mostly(buf, rs, addr);
if (uasm_rel_lo(addr)) {
if (!uasm_in_compat_space_p(addr))
uasm_i_daddiu(buf, rs, rs, uasm_rel_lo(addr));
else
uasm_i_addiu(buf, rs, rs, uasm_rel_lo(addr));
}
}
//
void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
{
if (!uasm_in_compat_space_p(addr)) {
//如果地址为64位,则按照该分支处理
uasm_i_lui(buf, rs, uasm_rel_highest(addr));
if (uasm_rel_higher(addr))
uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
if (uasm_rel_hi(addr)) {
uasm_i_dsll(buf, rs, rs, 16);
uasm_i_daddiu(buf, rs, rs, uasm_rel_hi(addr));
uasm_i_dsll(buf, rs, rs, 16);
} else
uasm_i_dsll32(buf, rs, rs, 0);
} else
//如果地址为32位,则按照该分支处理
uasm_i_lui(buf, rs, uasm_rel_hi(addr));
}
int uasm_i_compat_space_p(long addr)
{
//检测地址是否为32位地址,是返回1,否返回0
return addr == (int)addr;
}
//关于上述接口中所调用的uasm_i_xxx()接口的声明如下:
#define Ip_u1s2(op) void uasm_i##op(u32 **buf, unsigned int a, unsigned int b)
#define I_u1s2(op) Ip_u1s2(op) { build_insn(buf, insn##op, a, b); } \
UASM_EXPORT_SYMBOL(uasm_i##op);
#define UASM_EXPORT_SYMBOL EXPORT_SYMBOL
//因此,uasm_i_lui接口的实际原型为:
void uasm_i_lui(u32 **buf, unsigned int a, unsigned int b)
{
build_insn(buf, insn_lui, a, b);
}
EXPORT_SYMBOL(uasm_i_lui);
//宏定义表示为:
I_u1s2(_lui);
//由上可知,实际执行函数为build_insn()接口,该接口操作前,需实例化数组对象insn_table,如下:
//枚举类型,即为每条操作码设置一个索引号
enum opcode {
...,
insn_lui,
...
};
struct insn {
u32 match;
enum fields fields;
};
//insn_table数组对象
static const struct insn insn_table[insn_invalid] = {
...,
[insn_lui] = {M(lui_op, 0, 0, 0, 0, 0), RT | SIMM};
...
};
#define M(a, b ,c, d, e, f) ((a) << OP_SH | (b) << RS_SH | (c) << RT_SH | (d) << RD_SH | (e) << RE_SH | (f) << FUNC_SH)
//假设当前指令操作码为lui,则该指令为:
struct insn insn_table[insn_lui];
&insn_table[insn_lui]->match = 15 << 26;
//match属性用于存放操作码
&insn_table[insn_lui]->fields = RT | SIMM;
//fields属性用于存放src与dst
//构建指令码,即opc dst/src src/dst
static void build_insn(u32 **buf, enum opcode opc, ...)
{
const struct insn *ip;
va_list ap;
u32 op;
if (opc < 0 || opc >= insn_invalid || (opc == insn_daddiu && r4k_daddiu_bug()) || (insn_table[opc].match == 0 && insn_table[opc].fields == 0))
panic("Unsupported Micro-assembler instruction %d", opc);
ip = &insn_table[opc];
//获取insn_table数组中opc索引所对应的struct insn结构体对象
op = ip->match;
//操作码与dst或src进行组合,拼成指令码
va_start(ap, opc);
...
if (ip->fields & RT)
op |= build_rt(va_arg(ap, u32));
...
if (ip->fields & SIMM)
op |= build_simm(va_arg(ap, u32));
...
va_end(ap);
**buf = op;
//赋值操作
(*buf)++;
}
static inline u32 build_rt(u32 arg)
{
WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler filed overflow\n");
return (arg & RT_MASK) << RT_SH;
//#define RT_MASK 0x1f
//#define RT_SH 16
}
static inline u32 build_simm(s32 arg)
{
WARN(arg > 0x7fff || arg < -0x8000, KERN_WARNING "Micro-assembler field overflow\n");
return arg & 0xffff;
}
*/
//综上,可知替换指令已经创建完成
//替换指令
static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
{
int faulted;
mm_segment_t old_fs;
safe_store_code(new_code, ip, faulted);
if (unlikely(faulted))
return -EFAULT;
old_fs = get_fs();
set_fs(get_ds());
flush_icache_range(ip, ip + 8);
set_fs(old_fs);
return 0;
}
#define safe_store_code(src, dst, error) safe_store(STR(sw), src, dst, error)
#define safe_store(store, src, dst, error)
do {
asm volatile (
"1: " store " %[tmp_src], 0(%[tmp_dst])\n"
//store变量为sw指令,该指令作用为:
//%[tmp_dst] = %[tmp_src]
//%[addr],取addr地址处的值
" li %[tmp_err], 0\n"
//将0写入tmp_err地址处
"2: .insn\n"
".section .fixup, \"ax\"\n"
"3: li %[tmp_err], 1\n"
" j 2b\n"
".previous\n"
".section\t__ex_table,\"a\"\n\t"
STR(PTR) "\t1b, 3b\n\t"
//#define PTR .word
".previous\n"
: [tmp_err] "=r" (error)
//输出
: [tmp_dst] "r" (dst), [tmp_src] "r" (src)
//输入
: "memory"
//操作均在内存中完成
);
} while (0)
//注意,汇编中src实际为&INSN_NOP
//flush_icache_range():
static void local_loongson3_flush_icache_range(unsigned long start, unsigned long end)
{
asm volatile ("\tsynci 0($0)\n"::);
//同步指令数据缓存
}
static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end)
{
struct ftrace_page *start_pg;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
unsigned long count;
unsigned long *p;
unsigned long addr;
unsigned long flags = 0;
int ret = -ENOMEM;
count = end - start;
if (!count)
return 0;
sort(start, count, sizeof(*start), ftrace_cmp_ips, NULL);
//对记录的_mcount地址进行排序
start_pg = ftrace_allocate_pages(count);
//创建ftrace_page结构体对象
//该过程中,创建的组织结构可能为:
/*
*|-------------|
*| ftrace_page |
*|+++++++++++++|
*| next |------>|-------------|
*|+++++++++++++| | ftrace_page |
*| records |-->|
*|+++++++++++++| |
*| | |---------------------------------|
* | dyn_ftrace0 | dtn_ftrace1 | ... |
* |---------------------------------|
*/
//dyn_ftrace结构体记录_mcount函数被调用的位置
if (!start_pg)
return -ENOMEM;
mutex_lock(&ftrace_lock);
if (!mod) {
WARN_ON(ftrace_pages || ftrace_pages_start);
ftrace_pages = ftrace_pages_start = start_pg;
} else {
if (!ftrace_pages)
goto out;
if (WARN_ON(ftrace_pages->next)) {
while (ftrace_pages->next)
ftrace_pages = ftrace_pages->next;
}
ftrace_pages->next = start_pg;
//单向循环链表
}
p = start;
pg = start_pg;
while (p < end) {
addr = ftrace_call_adjust(*p++);
if (!addr)
continue;
if (pg->index == pg->size) {
//当前ftrace_page空间无法存放所有_macount信息时,将使用下一个ftrace_page
if (WARN_ON(!pg->next))
break;
pg = pg->next;
}
rec = &pg->records[pg->index++];
//从索引0开始填充ftrac_page结构体中的dyn_ftrace结构体对象
rec->ip = addr;
//记录_mcount函数被调用的位置
}
WARN_ON(pg->next);
ftrace_pages = pg;
if (!mod)
local_irq_save(flags);
ftrace_update_code(mod, start_pg);
//替换_mcount函数被调用的位置的指令
if (!mod)
local_irq_restore(flags);
ret = 0;
out:
mutex_unlock(&ftrace_lock);
return ret;
}
static struct ftrace_page *ftrace_allocate_pages(unsigned long num_to_init)
{
struct ftrace_page *start_pg;
struct ftrace_page *pg;
int order;
int cnt;
if (!num_to_init)
return 0;
start_pg = pg = kzalloc(szieof(*pg), GFP_KERNEL);
if (!pg)
return NULL;
for (;;) {
cnt = ftrace_allocate_records(pg, num_to_init);
//计算可包含records个数,该过程中以连续的空闲页分配地址空间
//cnt <= num_to_init
if (cnt < 0)
goto free_pages;
num_to_init -= cnt;
if (!num_to_init)
break;
//判断当前ftrace_page是否包含所有records,如果没有则创建新的ftrace_page来包含
pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg->next)
goto free_pages;
pg = pg->next;
}
return start_pg;
free_pages:
pg = start_pg;
while (pg) {
order = get_count_order(pg->size / ENTRIES_PER_PAGE);
free_pages((unsigned long)pg->records, order);
start_pg = pg->next;
kfree(pg);
pg =start_pg;
}
pr_info("ftrace: FAILED to allocate memory for functions\n");
return NULL;
}
static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
int order;
int cnt;
if (WARN_ON(!count))
return -EINVAL;
order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
//获取幂数
while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
order--;
//对幂数进行调整
again:
pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
//分配空闲页,ftrace_page结构体中records指向该页首地址
if (!pg->records) {
if (!order)
return -ENOMEM;
order >>= 1;
goto again;
}
cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
pg->size = cnt;
if (cnt > count)
cnt = count;
return cnt;
}
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
struct ftrace_page *pg;
struct dyn_ftrace *p;
u64 start, stop;
unsigned long update_cnt = 0;
unsigned long rec_flags = 0;
int i;
start = ftrace_now(raw_smp_processor_id());
if (mod)
rec_flags |= FTRACE_FL_DISABLED;
for (pg = new_pgs; pg; pg = pg->next) {
//遍历所有的ftrace_page
for (i = 0; i < pg->next; i++) {
//遍历ftrace_page中所有的dyn_ftrace结构体
if (unlikely(ftrace_disabled))
return -1;
p = &pg->records[i];
p->flags = rec_flags;
if (!__is_defined(CC_USING_NOP_MCOUNT) && !ftrace_code_disable(mod, p))
//将调用_mcout()接口的指令修改为nop指令
break;
update_cnt++;
}
}
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += update_cnt;
return 0;
}
static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
int ret;
if (unlikely(ftrace_disabled))
return 0;
ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
//将调用_mcount函数的指令替换为nop指令
if (ret) {
ftrace_bug_type = FTRACE_BUG_INIT;
ftrace_bug(ret, rec);
return 0;
}
return 1;
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int new;
unsigned long ip = rec->ip;
new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
//判断ip地址的位置,来选择替换的指令
//如果ip地址处于内核而非模块中,则使用INSN_NOP,否则使用后者
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
return ftrace_modify_code_2(ip, new, INSN_NOP);
#endif
}
int notrace core_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext && add < (unsigned long)_etext)
return 1;
if (system_state < SYSTEM_RUNNING && init_kernel_text(addr))
return 1;
return 0;
}
int init_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_sinittext &&
addr < (unsigned long)_einittext)
return 1;
return 0;
}
//利用ftrace设置启动阶段时的过滤点
/*
static void __init set_ftrace_early_filters(void)
{
if (ftrace_filter_buf[0])
ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
...
}
static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
static int __init set_ftrace_filter(char *str)
{
ftrace_filter_param = true;
strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
return 1;
}
__setup("ftrace_filter=", set_ftrace_filter);
*/
//该类过滤点需要在启动命令行中设置,否则为空