关于ftrace提供的接口应用如下所示:
static struct ftrace_ops kpatch_ftrace_ops __read_mostly = {
.func = kpatch_ftrace_handler,
.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
};
ftrace_set_filter_ip(&kpatch_ftrace_ops, ip, 0, 0);
register_ftrace_function(&kpatch_ftrace_ops);
//ftrace操作结构体
struct ftrace_ops {
ftrace_func_t func;
struct ftrace_ops __rcu *next;
unsigned long flags;
void *private;
ftrace_func_t saved_func;
#ifdef CONFIG_DYNAMIC_FTRACE
struct ftrace_ops_hash local_hash;
struct ftrace_ops_hash *func_hash;
struct ftrace_ops_hash old_hash;
unsigned long trampoline;
unsigned long trampoline_size;
struct list_head list;
#endif
}
enum {
FTRACE_OPS_FL_ENABLED = 1 << 0,
FTRACE_OPS_FL_GLOBAL = 1 << 1,
FTRACE_OPS_FL_DYNAMIC = 1 << 2,
FTRACE_OPS_FL_CONTROL = 1 << 3,
FTRACE_OPS_FL_SAVE_REGS = 1 << 4,
FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5,
FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6,
FTRACE_OPS_FL_STUB = 1 << 7,
FTRACE_OPS_FL_INITIALIZED = 1 << 8,
FTRACE_OPS_FL_IPMODIFY = 1 << 9,
};
//ftrace_ops哈希链结构
struct ftrace_ops_hash {
struct ftrace_hash __rcu *notrace_hash;
struct ftrace_hash __rcu *filter_hash;
struct mutex regex_lock;
};
//ftrace哈希链结构
struct ftrace_hash {
unsigned long size_bits;
struct hlist_head *buckets;//哈希桶
unsigned long count;
unsigned long flags;
struct rcu_head rcu;
};
//ftrace设置过滤地址
int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset)
{
//初始ftrace_ops结构体对象
ftrace_ops_init(ops);
//将所要更改指令的ip地址与ftrace_ops结构体对象关联
return ftrace_set_addr(ops, ip, remove, reset, 1);
}
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
//打开ftrace动态跟踪设置
#ifdef CONFIG_DYNAMIC_FTRACE
if (!(ops->flags & FTRACE_OPS_FL_INITALIZED)) {
mutex_init(&ops->local_hash.regex_lock);
ops->func_hash = &ops->local_hash;
ops->flags |= FTRACE_OPS_FL_INITIALIZED;
//设置哈希链以及标志位
}
#endif
}
static int ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int reset, int enable)
{
//ip和ftrace_ops的关联主要通过结构体中的哈希链结构来实现
return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, unsigned long ip, int remove, int reset, int enable)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
mutex_lock(&ops->func_hash->regex_lock);
if (enable)
//如果使能
orig_hash = &ops->func_hash->filter_hash;
//设置哈希链结构,注意这里为二级指针,‘&’优先级低于’->‘
//不采用一级指针的原因是因为filter_hash为一级指针且值为NULL,因此无法直接赋值给其他一级指针。但是filter_hash指针自身的地址存在,可将其赋值给二级指针,这样*(struct ftrace_hash **)orig_hash便可以取到filter_hash中的地址值,但此时*orig_hash为NULL。
else
orig_hash = &ops->func_hash->notrace_hash;
if (reset)
hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
else
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
//创建哈希链
//#define FTRACE_HASH_DEFAULT_BITS 10
if (!hash) {
ret = -ENOMEM;
goto out_regex_unlock;
}
if (buf && !ftrace_match_records(hash, buf, len)) {
ret = -EINVAL;
goto out_regex_unlock;
}
if (ip) {
//ip存在,执行该分支
ret = ftrace_match_addr(hash, ip, remove);
//查看该地址是否存在
if (ret < 0)
goto out_regex_unlock;
}
mutex_lock(&ftrace_lock);
ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
//将orig_hash与hash相关联,即ops->func_hash->filter_hash=hash
mutex_unlock(&ftrace_lock);
out_regex_unlock:
mutex_unlock(&ops->func_hash->regex_lock);
free_ftrace_hash(hash);
return ret;
}
static struct ftrace_hash *alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
{
struct ftrace_func_entry *entry;
struct ftrace_hash *new_hash;
int size;
int ret;
int i;
new_hash = alloc_ftrace_hash(size_bits);
//创建ftrace_hash结构体对象,并对其进行初始
if (!new_hash)
return NULL;
if (hash)
new_hash->flags = hash->flags;
if (ftrace_hash_empty(hash))
//hash为NULL,直接返回创建的ftrace_hash结构体对象
return new_hash;
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
ret = add_hash_entry(new_hash, entry->ip);
if (ret < 0)
goto free_hash;
}
}
FTRACE_WARN_ON(new_hash->count != hash->count);
return new_hash;
free_hash:
free_ftrace_hash(new_hash);
return NULL;
}
static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
struct ftrace_hash *hash;
int size;
hash = kzalloc(sizeof(*hash), GFP_KERNEL);
//创建ftrace_hash结构体对象
if (!hash)
return NULL;
size = 1 << size_bits;
hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL);
//初始ftrace_hash结构体中的hlist_head结构体对象,即分配size个哈希桶结构体对象
if (!hash->buckets) {
kfree(hash);
return NULL;
}
hash->size_bits = size_bits;
return hash;
}
static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
{
return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD));
}
static int ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
{
struct ftrace_func_entry *entry;
if (!ftrace_location(ip))
//检查ip地址是否真实存在
return -EINVAL;
if (remove) {
entry = ftrace_lookup_ip(hash, ip);
if (!entry)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
}
return add_hash_entry(hash, ip);
//如果ip存在,则将其插入哈希链中
}
/*
unsigned long ftrace_location(unsigned long ip)
{
return ftrace_location_range(ip, ip);
}
struct ftrace_page {
struct ftrace_page *next;
struct dyn_ftrace *records;
int index;
int size;
};
//ftrace动态跟踪结构体
struct dyn_ftrace {
unsigned long ip;
unsigned long flags;
struct dyn_arch_ftrace arch;
};
//该结构体与架构相关,MIPS/X86等架构中为空
struct dyn_arch_ftrace {
};
unsigned long ftrace_location_range(unsigned long start, unsigned long end)
{
struct dyn_ftrace *rec;
rec = lookup_rec(start, end);
if (rec)
return rec->ip;
return 0;
}
//查找rec
static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec = NULL;
struct dyn_ftrace key;
key.ip = start;
key.flags = end;
for (pg = ftrace_pages_start; pg; pg = pg->next) {
//遍历ftrace中所有的ftrace_page
if (end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
continue;
rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs);
if (rec)
break;
}
return rec;
}
void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp)
{
return __inline_bsearch(key, base, num, size, cmp);
}
//二进制搜索
static __always_inline void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp)
{
const char *pivot;
//地址
int result;
while (num > 0) {
pivot = base + (num >> 1) * size;
//获取dyn_ftrace结构体对象
result = cmp(key, pivot);
//该函数为回调函数
if (result == 0)
return (void *)pivot;
if (result > 0) {
base = pivot + size;
num--;
}
num >>= 1;
}
return NULL;
}
//dyn_ftrace结构体比较
static int ftrace_cmp_recs(const void *a, const void *b)
{
const struct dyn_ftrace *key = a;
const struct dyn_ftrace *rec = b;
if (key->flags < rec->ip)
return -1;
if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
//MCOUNT_INSN_SIZE为mcount函数所对应的汇编指令以二进制形式所占据的字节大小
return 1;
return 0;
}
//根据代码,可知所要查找的ip地址与dyn_ftrace所指向的地址关系为:
// ip | dyn_ftrace,该情况为错误
// ip|dyn_ftrace,该情况为两者为同一地址
// dyn_ftrace | dyn_ftrace|ip,该情况下,ip在下一个dyn_ftrace位置处
*/
static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
{
struct ftrace_func_entry *entry;
/*
struct ftrace_func_entry {
struct hlist_node hlist;
unsigned long ip;
unsigned long direct;
};
*/
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
//创建ftrace_func_entry结构体对象
if (!entry)
return -ENOMEM;
entry->ip = ip;
__add_hash_entry(hash, entry);
//添加到哈希链中
return 0;
}
static void __add_hash_entry(struct ftrace_hash *hash, struct ftrace_func_entry *entry)
{
struct hlist_head *hhd;
unsigned long key;
key = ftrace_hash_key(hash, entry->ip);
//计算哈希索引
hhd = &hash->buckets[key];
hlist_add_head(&entry->hlist, hhd);
hash->count++;
}
/*
static int ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
{
return match_records(hash, buff, len, NULL);
}
static int match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec;
struct ftrace_glob func_g = { .type = MATCH_FULL };
struct ftrace_glob mod_g = { .type = MATCH_FULL };
struct ftrace_glob *mod_match = (mod) ? &mod_g : NULL;
//struct ftrace_glob {
// char *search;
// unsigned len;
// int type;
//};
int exclude_mod = 0;
int found = 0;
int ret;
int clear_filter = 0;
if (func) {
func_g.type = filter_parse_regex(func, len, &func_g.search, &clear_filter);
func_g.len = strlen(func_g.search);
}
if (mod) {
mod_g.type = filter_parse_regex(mod, strlen(mod), &mod_g_search, &exclude_mod);
mod_g.len = strlen(mod_g.search)
}
mutex_lock(&ftrace_lock);
if (unlikely(ftrace_disabled))
goto out_unlock;
if (func_g.type == MATCH_INDEX) {
found = add_rec_by_index(hash, &func_g, clear_filter);
goto out_unlock;
}
do_for_each_ftrace_rec(pg, rec) {
if (rec->flags & FTRACE_FL_DISABLED)
continue;
if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) {
ret = enter_record(hash, rec, clear_filter);
if (ret < 0) {
found = ret;
goto out_unlock;
}
found = 1;
}
} while_for_each_ftrace_rec();
out_unlock:
mutex_unlock(&ftrace_lock);
return found;
}
*/
static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, struct ftrace_hash **orig_hash, struct ftrace_hash *hash, int enable)
{
struct ftrace_ops_hash old_hash_ops;
struct ftrace_hash *old_hash;
int ret;
old_hash = *orig_hash;
old_hash_ops.filter_hash = ops->func_hash->filter_hash;
old_hash_ops.notrace_hash = ops->func_hash->notrace_hash;
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
if (!ret) {
ftrae_ops_update_code(ops, &old_hash_ops);
free_ftrace_hash_rcu(old_hash);
}
return ret;
}
/*
static int ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src)
{
struct ftrace_hash *new_hash;
int ret;
if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
return -EINVAL;
new_hash = __ftrace_hash_move(src);
if (!new_hash)
return -ENOMEM;
if (enable) {
ret = ftrace_hash_ipmodify_update(ops, new_hash);
if (ret < 0) {
free_ftrace_hash(new_hash);
return ret;
}
}
ftrace_hash_rec_disable_modify(ops, enable);
rcu_assign_pointer(*dst, new_hash);
ftrace_hash_rec_enable_modify(ops, enable);
}
static struct ftrace_hash *__ftrace_hash_move(struct ftrace_hash *src)
{
struct ftrace_func_entry *entry;
struct hlist_node *tn;
struct hlist_head *hhd;
struct ftrace_hash *new_hash;
int size = src->count;
int bits = 0;
int i;
if (ftrace_hash_empty(src))
return EMPTY_HASH;
for (size /= 2; size; size >>= 1)
bits++;
if (bits > FTRACE_HASH_MAX_BITS)
bits =FTRACE_HASH_MAX_BITS;
new_hash = alloc_ftrace_hash(bits);
if (!new_hash)
return NULL;
new_hash->flags = src->flags;
size = 1 << src->size_bits;
for (i = 0; i < size; i++) {
hhd = &src->buckets[i];
hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
remove_hash_entry(src, entry);
__add_hash_entry(new_hash, entry);
}
}
return new_hash;
}
*/
//注册ftrace_ops结构体
int register_ftrace_function(struct ftrace_ops *ops)
{
int ret = -1;
ftrace_ops_init(ops);
//之前已置位,不再执行
mutex_lock(&ftrace_lock);
ret = ftrace_startup(ops, 0);
//启动ftrace
mutex_unlock(&ftrace_lock);
return ret;
}
int ftrace_startup(struct ftrace_ops *ops, int command)
{
int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
ret = __register_ftrace_function(ops);
//注册ftrace_ops结构体对象
if (ret)
return ret;
ftrace_start_up++;
ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING;
ret = ftrace_hash_ipmodify_enable(ops);
if (ret < 0) {
__unregister_ftrace_function(ops);
ftrace_start_up--;
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
ftrace_trampoline_free(ops);
return ret;
}
if (ftrace_hash_rec_enable(ops, 1))
command |= FTRACE_UPDATE_CALLS;
ftrace_startup_enable(command);
//使能ftrace
ops->flags &= ~FTRACE_OPS_FL_ADDING;
return 0;
}
static int __register_ftrace_function(struct ftrace_ops *ops)
{
if (ops->flags & FTRACE_OPS_FL_DELETED)
return -EINVAL;
if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
return -EBUSY;
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
return -EINVAL;
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
#endif
if (!core_kernel_data((unsigned long)ops))
ops->flags |= FTRACE_OPS_FL_DYNAMIC;
add_ftrace_ops(&ftrace_ops_list, ops);
// ftrace_ops添加到全局链表ftrace_ops_list中
ops->saved_func = ops->func;
if (ftrace_pids_enabled(ops))
ops->func = ftrace_pid_func;
ftrace_update_trampoline(ops);
if (ftrace_enabled)
update_ftrace_function();
return 0;
}
static void ftrace_startup_enable(int command)
{
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
command |= FTRACE_UPDATE_TRACE_FUNC;
}
if (!command || !ftrace_enabled)
return;
ftrace_run_update_code(command);
}
static void ftrace_run_update_code(int command)
{
int ret;
ret = ftrace_arch_code_modify_prepare();
FTRACE_WARN_ON(ret);
if (ret)
return;
arch_ftrace_update_code(command);
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
}
void __weak arch_ftrace_update_code(int command)
{
ftrace_modify_all_code(command);
}
void ftrace_modify_all_code(int command)
{
int update = command & FTRACE_UPDATE_TRACE_FUNC;
int err = 0;
if (update) {
err = ftrace_update_ftrace_func(ftrace_ops_list_func);
//将ftrace_call()函数入口的第一条的指令替换为jal ftrace_ops_list_func
//注意:是ftrace_call()函数,该函数被ftrace_caller()调用
if (FTRACE_WARN_ON(err))
return;
}
if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
//将调用ftrace_caller()函数的指令由nop指令改为jal ftrace_caller+8
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
if (update && ftrace_trace_function != ftrace_ops_list_func) {
function_trace_op = set_function_trace_op;
smp_wmb();
if (!irqs_disabled())
smp_call_function(ftrace_sync_ipi, NULL, 1);
err = ftrace_update_ftrace_func(ftrace_trace_function);
if (FTRACE_WARN_ON(err))
return;
}
if (command & FTRACE_START_FUNC_RET)
err = ftrace_enable_ftrace_graph_caller();
else if (command & FTRACE_STOP_FUNC_RET)
err = ftrace_disable_ftrace_graph_caller();
FTRACE_WARN_ON(err);
}
#define JAL 0x0c000000
#define ADDR_MASK 0x03ffffff
#define INSN_JAL(addr) ((unsigned long)(JAL | (((addr) >> 2)& ADDR_MASK)))
#define FTRACE_CALL_IP ((unsigned long)(&ftrace_call))
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned int new;
new = INSN_JAL((unsigned long)func);
//假设:&func = 0x802e7fc8
//new = 0x0c000000 | ((0x802e7fc8 >> 2) & 0x03ffffff)
//new = 0x0c000000 | (0x400b9ff2 & 0x03ffffff)
//new = 0x0c000000 | 0x000b9ff2
//new = 0x0c0b9ff2
return ftrace_modify_code(FTRACE_CALL_IP, new);
}
void __weak ftrace_replace_code(int enable)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
int failed;
if (unlikely(ftrace_disbaled))
return;
do_for_each_ftrace_rec(pg, rec) {
if (rec->flag & FTRACE_FL_DISABLED)
continue;
failed = __ftrace_replace_code(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
return;
}
} while_for_each_ftrace_rec();
}
static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
int ret;
ftrace_addr = ftrace_get_addr_new(rec);
ftrae_old_addr = ftrace_get_addr_curr(rec);
ret = ftrace_update_record(rec, enable);
ftrace_bug_type = FTRACE_BUG_UNKNOWN;
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MAKE_CALL:
ftrace_bug_type = FTRACE_BUG_CALL;
return ftrace_make_call(rec, ftrace_addr);
case FTRACE_UPDATE_MAKE_NOP:
ftrace_bug_type = FTRACE_BUG_NOP;
return ftrace_make_nop(NULL, rec, ftrace_old_addr);
case FTRACE_UPDATE_MODIFY_CALL:
ftrace_bug_type = FTRACE_BUG_UPDATE;
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
return -1;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int new;
unsigned long ip = rec->ip;
//判断ip地址的位置,来选则替换的指令,这是因为如果调用_mcount函数的地址与_mcount函数的位置超过256M,则指令不同
new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ? INSN_NOP : insn_la_mcount[1]);
#endif
}
//通过对ftrace的分析,可知启动ftrace后,函数会执行下边的接口来执行trace所指向的函数
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs)
{
struct ftrace_ops *op;
int bit;
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return 0;
preempt_disable_notrace();
do_for_each_ftrace_op(op, ftrace_ops_list) {
if ((!(op->flags & FTRACE_OPS_FL_RCU) || (rcu_is_watching())) && ftrace_ops_test(op, ip, regs)) {
if (FTRACE_WARN_ON(!op->func)) {
pr_warn("op=%p, %pS\n", op, op);
goto out;
}
op->func(ip, parent_ip, op, regs);
}
} while_for_each_ftrace_op(op);
out:
preempt_enable_notrace();
trace_clear_recursion(bit);
}
通过对上述ftrace执行流程的分析,可知,ftrace执行的整体过程如下:
gcc -pg
kernel_function() {
|
jal ftracel_caller() {
|
b ftrace_stub
|
mcount_save_regs
ftrace_init
kernel_function() {
|
nop() {
|
nop
|
mcount_save_regs
ftrace (ftrace_set_filter_ip,register_ftrace_function)
kernel_function() {
|
jal ftracel_caller() + 8 {
|
mcount_save_regs (address:ftrace_caller+8)
|
ftrace_call()
|
jal ftrace_ops_list_func
需要注意的是,模块编译的结果与内核文本段编译的结果存在差别,即:
lui v0, x0
daddiu v0, v0, x0
move AT, ra
jalr v0
nop
当注册内核模块时,会出现如下的调用顺序,
load_module
|
ftrace_module_init
|
ftrace_process_locs
|
ftrace_update_code
|
ftrace_code_disable
|
ftrace_make_nop
此时,指令会有所修改,变为:
b 4
daddiu v0, v0, x0
move AT, ra
jalr v0
nop
这些便是内核模块的区别。
以上是基于MIPS架构的分析,其他架构可能存在差异。