strace是通过ptrace来跟踪系统调用的。ptrace可以让一个进程跟踪和控制另一个进程的执行,正在执行追踪的进程称为tracer,被追踪的进程称为tracee。ptrace定义为long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data),参数 request是请求类型,其中PTRACE_ATTACH用于建立tracer和tracee关系,PTRACE_SYSCALL用于跟踪pid对应tracee的系统调用。
1.建立跟踪关系
建立tracer 与tracee 的跟踪关系时,ptrace系统调用参数request为PTRACE_ATTACH,pid为被跟踪进程的tracee,其余两个参数为0L。ptrace系统调用内核代码先找到pid对应的task_struct,再调用ptrace_attach()建立跟踪关系。
//strace源码
static int ptrace_attach_or_seize(int pid)
{
#if USE_SEIZE
int r;
if (!use_seize)
return ptrace_attach_cmd = "PTRACE_ATTACH",
ptrace(PTRACE_ATTACH, pid, 0L, 0L);
r = ptrace(PTRACE_SEIZE, pid, 0L, (unsigned long) ptrace_setoptions);
if (r)
return ptrace_attach_cmd = "PTRACE_SEIZE", r;
r = ptrace(PTRACE_INTERRUPT, pid, 0L, 0L);
return ptrace_attach_cmd = "PTRACE_INTERRUPT", r;
#else
return ptrace_attach_cmd = "PTRACE_ATTACH",
ptrace(PTRACE_ATTACH, pid, 0L, 0L);
#endif
}
//ptrace内核源码
SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
unsigned long, data)
{
struct task_struct *child;
long ret;
....................................................
//找到pid对应的task_struct
child = ptrace_get_task_struct(pid);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
goto out;
}
//attach到被跟踪进程
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
/*
* Some architectures need to do book-keeping after
* a ptrace attach.
*/
if (!ret)
arch_ptrace_attach(child);
goto out_put_task_struct;
}
.........................................................
}
ptrace_attach()首先设置tracee对应task_struct的成员ptrace,这里为0,然后再调用ptrace_link()->__ptrace_link()建立tracer与tracee的关系。tracer与tracee结构体关系,简化如下图。
static int ptrace_attach(struct task_struct *task, long request,
unsigned long addr,
unsigned long flags)
{
bool seize = (request == PTRACE_SEIZE);
int retval;
........................................
//设置被跟踪进程的ptrace为flag,当前为0
if (seize)
flags |= PT_SEIZED;
task->ptrace = flags;
//建立tracer与tracee的关系
ptrace_link(task, current);
......................................
}
static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
rcu_read_lock();
__ptrace_link(child, new_parent, __task_cred(new_parent));
rcu_read_unlock();
}
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
const struct cred *ptracer_cred)
{
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
child->ptracer_cred = get_cred(ptracer_cred);
}
2.跟踪tracee的系统调用
跟踪tracee的系统调用时request为PTRACE_SYSCALL,pid为tracee的pid。ptrace系统调用内核代码先找到pid对应的task_struct,再调用芯片平台对应的arch_ptrace(),ARM64接着调用ptrace_request()。ptrace_request()根据请求类型的不同调用不同的函数处理,PTRACE_SYSCALL请求类型对应的是ptrace_resume()。
SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
unsigned long, data)
{
struct task_struct *child;
long ret;
.........................................................
//找到pid对应的task_struct
child = ptrace_get_task_struct(pid);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
goto out;
}
........................................................
//调用芯片平台对应的arch_ptrace
ret = arch_ptrace(child, request, addr, data);
if (ret || request != PTRACE_DETACH)
ptrace_unfreeze_traced(child);
.......................................................
}
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
return ptrace_request(child, request, addr, data);
}
int ptrace_request(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
bool seized = child->ptrace & PT_SEIZED;
int ret = -EIO;
siginfo_t siginfo, *si;
void __user *datavp = (void __user *) data;
unsigned long __user *datalp = datavp;
unsigned long flags;
switch (request) {
..................................................
#ifdef PTRACE_SINGLESTEP
case PTRACE_SINGLESTEP:
#endif
#ifdef PTRACE_SINGLEBLOCK
case PTRACE_SINGLEBLOCK:
#endif
#ifdef PTRACE_SYSEMU
case PTRACE_SYSEMU:
case PTRACE_SYSEMU_SINGLESTEP:
#endif
case PTRACE_SYSCALL:
case PTRACE_CONT:
return ptrace_resume(child, request, data);
................................................
}
return ret;
}
ptrace_resume()判断requset是否为PTRACE_SYSCALL,如果是就置位tracee对应thread_info的flags第TIF_SYSCALL_TRACE位,如果不是复位tracee对应thread_info的flags第TIF_SYSCALL_TRACE位。
static int ptrace_resume(struct task_struct *child, long request, unsigned long data)
{
bool need_siglock;
if (!valid_signal(data))
return -EIO;
if (request == PTRACE_SYSCALL)
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); //置位
else
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); //复位
.......................................................
}
static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
set_ti_thread_flag(task_thread_info(tsk), flag);
}
static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
clear_ti_thread_flag(task_thread_info(tsk), flag);
}
3.tracee系统调用时通知tracer
tracee在进入系统调用的时候会调用函数syscall_trace_enter(),退出系统调用的时候调用syscall_trace_exit(),这两个函数最终都会调用ptrace_report_syscall()通知tracer。
__sys_trace:
mov w0, #-1 // set default errno for
cmp scno, x0 // user-issued syscall(-1)
b.ne 1f
mov x0, #-ENOSYS
str x0, [sp, #S_X0]
1: mov x0, sp
bl syscall_trace_enter
cmp w0, #-1 // skip the syscall?
b.eq __sys_trace_return_skipped
uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
cmp scno, sc_nr // check upper syscall limit
b.hs __ni_sys_trace
ldp x0, x1, [sp] // restore the syscall args
ldp x2, x3, [sp, #S_X2]
ldp x4, x5, [sp, #S_X4]
ldp x6, x7, [sp, #S_X6]
ldr x16, [stbl, scno, lsl #3] // address in the syscall table
blr x16 // call sys_* routine
__sys_trace_return:
str x0, [sp, #S_X0] // save returned x0
__sys_trace_return_skipped:
mov x0, sp
bl syscall_trace_exit
b ret_to_user
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
return -1;
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, regs->syscallno);
audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
regs->regs[2], regs->regs[3]);
return regs->syscallno;
}
asmlinkage void syscall_trace_exit(struct pt_regs *regs)
{
audit_syscall_exit(regs);
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_exit(regs, regs_return_value(regs));
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
}
static void tracehook_report_syscall(struct pt_regs *regs,
enum ptrace_syscall_dir dir)
{
int regno;
unsigned long saved_reg;
regno = (is_compat_task() ? 12 : 7);
saved_reg = regs->regs[regno];
regs->regs[regno] = dir;
if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
regs->syscallno = ~0UL;
regs->regs[regno] = saved_reg;
}
static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
{
if (step) {
siginfo_t info;
user_single_step_siginfo(current, regs, &info);
force_sig_info(SIGTRAP, &info, current);
return;
}
ptrace_report_syscall(regs);
}
4.总结
strace是通过ptrace来跟踪系统调用的,流程为:以PTRACE_ATTACH为参数调用ptrace,建立跟踪关系;以PTRACE_SYSCALL为参数调用ptrace,指明跟踪系统调用;被跟踪进程进出系统调用时,通知跟踪进程。