一、task_struct的遍历
进程,线程和内核线程的基本属性都是由struct task_struct结构体定义的。
设备上运行的所有task都是通过双链表连成一串,其中第一个task就是init_task,最终又指向会init_task。因此可以通过首个task(init_task)通过双向链表(tasks)遍历所有task,最终回到init_task方式遍历所有的task。
首进程init_task在内核启动的时候静态赋值创建。(/init/init_task.c)
struct task_struct init_task
= {
.state = 0,
.stack = init_stack,
.usage = REFCOUNT_INIT(2),
.flags = PF_KTHREAD,
.prio = MAX_PRIO - 20,
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask,
.cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed = NR_CPUS,
.mm = NULL,
.active_mm = &init_mm,
.tasks = LIST_HEAD_INIT(init_task.tasks),
......
.signal = &init_signals,
.sighand = &init_sighand,
.nsproxy = &init_nsproxy,
.pending = {
.list = LIST_HEAD_INIT(init_task.pending.list),
.signal = {{0}}
},
.blocked = {{0}},
.alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
.journal_info = NULL,
INIT_CPU_TIMERS(init_task)
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
.timer_slack_ns = 50000, /* 50 usec default slack */
.thread_pid = &init_struct_pid,
.thread_group = LIST_HEAD_INIT(init_task.thread_group),
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
......
};
EXPORT_SYMBOL(init_task);
可以在编译生成的linux镜像中查找init_task符号表
nm vmlinux | grep init_task
然而一个进程中往往有很多线程,线程也与进程类似,通过双向链表串联,最终挂在进程task_struct下的thread_group
遍历系统中所有的进程和进程中所有的线程。
其中打印堆栈的方法:https://blog.csdn.net/sydyh43/article/details/119707079
二、遍历的实现
#define BKTRACE_DEPTH 30
#define next_task(p) list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
#define next_thread(p) list_entry_rcu((p)->thread_group.next, struct task_struct, thread_group)
static void print_tasks_stack(void)
{
struct task_struct *p = NULL, *tmp = NULL, *pt = NULL;
unsigned long backtrace[BKTRACE_DEPTH];
struct stack_trace trace;
printk("dump stack....\n");
rcu_read_lock();
for (p = &init_task; (tmp = next_task(p)) != &init_task; ) {
pt = p;
do {
if (pt->state == TASK_RUNNING) {
printk("task name:%s\n", pt->comm);
memset(backtrace, 0x00, sizeof(unsigned long) * BKTRACE_DEPTH);
memset(&trace, 0x00, sizeof(struct stack_trace));
trace.max_entries = BKTRACE_DEPTH;
trace.entries = backtrace;
save_stack_trace_tsk(pt, &trace);
print_stack_trace(&trace, 0);
}
if (pt->state & TASK_INTERRUPTIBLE) {
printk("task name:%s\n", pt->comm);
memset(backtrace, 0x00, sizeof(unsigned long) * BKTRACE_DEPTH);
memset(&trace, 0x00, sizeof(struct stack_trace));
trace.max_entries = BKTRACE_DEPTH;
trace.entries = backtrace;
save_stack_trace_tsk(pt, &trace);
print_stack_trace(&trace, 0);
}
if (pt->state & TASK_UNINTERRUPTIBLE) {
printk("task name:%s\n", pt->comm);
memset(backtrace, 0x00, sizeof(unsigned long) * BKTRACE_DEPTH);
memset(&trace, 0x00, sizeof(struct stack_trace));
trace.max_entries = BKTRACE_DEPTH;
trace.entries = backtrace;
save_stack_trace_tsk(pt, &trace);
print_stack_trace(&trace, 0);
}
}while((pt = next_thread(pt)) != p);
p = tmp;
}
rcu_read_unlock();
}
遍历结果
发现个问题,创建线程时,printk("task name:%s\n", pt->comm);打印出来的线程名就是进程的名字,最终不好定位问题。
因此,在创建线程的时候最好指定线程名。创建线程名的方法。
#include <sys/prctl.h>
static void *thread_fun0(void *arg)
{
prctl(PR_SET_NAME, "thread_fun0");
while (1) {
fun_b();
sleep(5);
}
return NULL;
}
三、应用
1、模拟死锁的代码
static void fun_a(void)
{
pthread_mutex_lock(&mtx0);
sleep(2);
printf("1%s\n", __func__);
pthread_mutex_lock(&mtx1);
printf("2%s\n", __func__);
return;
}
static void fun_b(void)
{
pthread_mutex_lock(&mtx1);
sleep(1);
printf("1%s\n", __func__);
pthread_mutex_lock(&mtx0);
printf("1%s\n", __func__);
return;
}
static void *thread_fun0(void *arg)
{
prctl(PR_SET_NAME, "thread_fun0");
while (1) {
fun_a();
sleep(5);
}
return NULL;
}
static void *thread_fun1(void *arg)
{
prctl(PR_SET_NAME, "thread_fun1");
while (1) {
fun_b();
sleep(7);
}
return NULL;
}
2、打印堆栈
3、因此当设备出现狗咬死复位的前一刻,需要把堆栈都打印出来,方便后续的问题定位