内核hungTask日志引发的思考，为什么进程变成了TASK_UNINTERRUPTIBLE

序冢--磊

已于 2024-02-15 10:04:30 修改

阅读量404

点赞数 5

文章标签： linux 运维服务器

于 2024-02-02 12:20:24 首次发布

本文链接：https://blog.csdn.net/qq_32783703/article/details/135972057

版权

一、背景

很久之前同事找我问我一个问题，问我linux kernel 里这个日志是做什么的

task PartsCleaning: 7680 blocked for more than 5 seconds.

这条日志产生的位置在

/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
		if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE)
			check_hung_task(t, timeout);

日志发生在check_hung_task 里面，这个是内核里的保活程序，类似看门狗，需要定时检查task的最后切换时间，并且是非 TASK_UNINTERRUPTIBLE，内核里task 一旦被设置为 TASK_UNINTERRUPTIBLE 就不能接收linux的任何信号了，哪怕是

kill -9 pid

内核依然不会杀死进程。

二、探索TASK_UNINTERRUPTIBLE的根源

TASK_UNINTERRUPTIBLE 我们知道是在linux内核中等待队列的标志位，具体使用方法

wait_queue_head_t rwq; //分配一个读的等待队列头， 全局变量

init_waitqueue_head(&wq); //在驱动入口函数初始化

uart_read()

{

        wait_queue_t wait; //分配等待队列

        init_waitqueue_entry(&wait, current); //将当前进程添加到容器中

        add_wait_queue(&rwq, &wait); //将当前进程添加到队列头中

        set_current_state(TASK_INTERRUPTIBLE);//设置当前进程的状态

        schedule(); //进入真正的休眠状态（CPU资源让给别的任务）

        set_current_state(TASK_RUNNING);

        remove_wait_queue(&rwq, &wait);

        //一旦被唤醒，要判断是哪个原因引起的唤醒

        if(signal_pending(current))

        {

                printk("RECV SIN!\n");

               return -ERESTARTSYS; //返回用户空间的read

       }  else {  

                //由于数据可用引起的唤醒读取串口数据

                copy_to_user(...); //上报数据

        }

}

既然这个操作是内核做的，那么我们在用户层怎么能把进程变成TASK_UNINTERRUPTIBLE 呢？

当进程出现 TASK_UNINTERRUPTIBLE 时候我们应该怎么办呢？

下面用一个案例带你进入TASK_UNINTERRUPTIBLE的真相

比如说一个main进程

[root@app01 demo]# ps aux|grep main
root     17418 99.1  0.0   4220   668 pts/0    D+   10:42  12:54 ./main
root     29216  0.0  0.0 112820  2332 pts/1    S+   10:55   0:00 grep --color=auto main
[root@app01 demo]#

我们发现进程已经进入了D状态，一旦进入D状态不会收到任何信号，哪怕是kill -9

进程进入D状态我们怎么排查呢？

[root@app01 demo]# cat /proc/17418/stack 
[<0>] __refrigerator+0x50/0x150
[<0>] get_signal+0x874/0x890
[<0>] do_signal+0x34/0x270
[<0>] exit_to_usermode_loop+0x9d/0x130
[<0>] prepare_exit_to_usermode+0x7f/0xb0
[<0>] retint_user+0x8/0x8
You have new mail in /var/spool/mail/root

三、探索TASK_UNINTERRUPTIBLE的根源

我们看到内核里 __refrigerator 导致的，__refrigerator 是内核模块freezer

然后去cgroup freezer下看程序是否被冻住

[root@app01 demo]# cat freezer.state 
FROZEN
[root@app01 demo]

发现程序被冻住了，我们给程序解冻

 echo THAWED > freezer.state

再去查看程序运行状态

116  0.0   4220   652 pts/0    R+   12:16   0:03 ./main

我们发现程序运行正常了

另一种变成 D的方法：

/** 

* File: uninterruptible.c 

* 

* $ gcc uninterruptible.c -o uninterruptible.out 

* $ ps -o ppid,pid,stat,cmd $(pgrep -f uninterruptible.out) 

* $ for ((i=0;i<100;i++)); do ./uninterruptible.out & done 

* $ uptime 

* 

* Ref: https://unix.stackexchange.com/questions/134888/simulate-an-unkillable-process-in-d-state 

*/ 

#include <unistd.h> 

#include <stdlib.h> 

#include <string.h> 

#include <stdio.h> 

#include <sys/types.h> 

#include <sys/wait.h> 

void vfork_sleep(int secs) 

{ 

	pid_t pid = vfork(); 

	/** 

	 * Children in interruptible sleep 

	 * Parent in uninterruptible sleep 

	 */ 

	sleep(secs); 

	if (pid > 0) { 

		waitpid(pid, NULL, 0); 

	} else if (pid == 0) { 

		exit(0); 

	} 

} 

void usage(int argc, char *argv[]) 

{ 

	printf("usage: [option].\n"); 

	printf(" vfork-sleep [sleep sec(60 by default)]: use vfork() sleep() generate D task.\n"); 

	printf("\n"); 

	printf(" run 'for ((i=0;i<100;i++)); do %s [opts] & done' to make plenty of D tasks.\n", argv[0]); 

	exit(0); 

} 

int main(int argc, char *argv[]) 

{ 

	if (argc < 2) { 

		usage(argc, argv); 

	} 

	if (!strcmp(argv[1], "vfork-sleep")) { 

		int secs = argc==3?atoi(argv[2]):60; 

		vfork_sleep(secs?:60); 

	} else { 

		usage(argc, argv); 

	} 

	return 0; 

}