linux内核hungtask机制学习

wwwwwnp

已于 2024-08-02 17:05:49 修改

阅读量389

点赞数 7

文章标签：学习 linux

于 2024-07-29 11:59:59 首次发布

本文链接：https://blog.csdn.net/wwwwwnp/article/details/140767749

版权

引用图片：https://mp.weixin.qq.com/s/7zMleKHYbJoOiEFZiakbiw

static int __init hung_task_init(void)
{
	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);    //注册panic事件的通知链：通过atomic_notifier_chain_register函数将panic_block注册到panic_notifier_list通知链中。这样，在系统发生panic时，hung_task_init函数会接收到通知。

	/* Disable hung task detector on suspend */
	pm_notifier(hungtask_pm_notify, 0);    //在系统暂停（suspend）时禁用hung task检测器：通过调用pm_notifier函数注册一个电源管理（power management）通知器hungtask_pm_notify，并在其回调函数中禁用hung task检测器。

	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");    //创建并启动watchdog线程：使用kthread_run函数创建并启动一个名为“khungtaskd”的内核线程，该线程负责执行hung task的检测和处理逻辑。

	return 0;
}
subsys_initcall(hung_task_init);

static struct notifier_block panic_block = {
	.notifier_call = hung_task_panic,    //panic发生时hung_task回调函数
};

static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
	did_panic = 1;    //表明系统检测到了一个无法恢复的错误或任务挂起的情况。

	return NOTIFY_DONE;    //表示回调函数已经成功处理了通知，并且通知链可以继续传递给下一个注册的回调函数。
}

static int watchdog(void *dummy)
{
	unsigned long hung_last_checked = jiffies;    //记录当前时间(`jiffies`)到变量`hung_last_checked`中，这将用于后续的挂起任务检测。

	set_user_nice(current, 0);    //将当前进程的优先级设置为0，以确保它能够及时运行。

	for ( ; ; ) {
		unsigned long timeout = sysctl_hung_task_timeout_secs;    //挂起任务的超时时间
		unsigned long interval = sysctl_hung_task_check_interval_secs;    //检测间隔时间
		long t;

		if (interval == 0)
			interval = timeout;    //如果`sysctl_hung_task_check_interval_secs`为0，则使用`sysctl_hung_task_timeout_secs`作为检测间隔。
		interval = min_t(unsigned long, interval, timeout);    //使用`min_t`函数确定检测间隔`interval`，确保不会超过超时时间`timeout`。
		t = hung_timeout_jiffies(hung_last_checked, interval);    //调用`hung_timeout_jiffies`函数计算从上一次检测到现在的时间差，如果这段时间超过了`interval`，则执行挂起任务检测逻辑。
		if (t <= 0) {
			if (!atomic_xchg(&reset_hung_task, 0) &&
			    !hung_detector_suspended)    //满足条件表示没有正在进行的挂起任务检测
				check_hung_uninterruptible_tasks(timeout);    //函数来检查是否有挂起的不可中断任务，D状态进程，不可中断任务
			hung_last_checked = jiffies;    //更新`hung_last_checked`为当前时间`jiffies`，以便于下次检测
			continue;
		}
		schedule_timeout_interruptible(t);    //如果计算出的`t`大于0，表示未达到检测挂起任务的时间点，此时调用`schedule_timeout_interruptible`函数进行可中断的超时等待，等待时间为`t`
	}

	return 0;
}

static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
	int max_count = sysctl_hung_task_check_count;    //限制检查的进程数量
	unsigned long last_break = jiffies;
	struct task_struct *g, *t;

	/*
	 * If the system crashed already then all bets are off,
	 * do not report extra hung tasks:
	 */
	if (test_taint(TAINT_DIE) || did_panic)    //通过taint标志和did_panic变量判断系统是否已发生崩溃或panic
		return;

	hung_task_show_lock = false;
	rcu_read_lock();
	for_each_process_thread(g, t) {    //遍历所有进程及其线程。
		if (!max_count--)
			goto unlock;    /遍历完后处理
		if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
			if (!rcu_lock_break(g, t))    //用于RCU上下文中，检查两个特定任务是否仍处于活动状态。通过一系列的锁操作、重新调度以及任务状态检查，确保了在多线程环境下对任务状态的准确判断。
				goto unlock;
			last_break = jiffies;
		}
		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
		if (t->state == TASK_UNINTERRUPTIBLE)    //若果是D状态进程，则调用`check_hung_task`函数检查该线程是否已挂起超过指定的超时时间`timeout`。
			check_hung_task(t, timeout);
	}
 unlock:
	rcu_read_unlock();
	if (hung_task_show_lock)
		debug_show_all_locks();    //显示所有锁的信息
	if (hung_task_call_panic) {
		trigger_all_cpu_backtrace();    //记录所有CPU的回溯信息
		panic("hung_task: blocked tasks");    //通过`panic`函数引发系统异常复位
	}
}

static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
	unsigned long switch_count = t->nvcsw + t->nivcsw;    //进程，现成上下文切换次数

	/*
	 * Ensure the task is not frozen.
	 * Also, skip vfork and any other user process that freezer should skip.
	 */
	if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))    //确定任务是否处于冻结状态或进程应被跳过，不应被冻结。
	    return;

	/*
	 * When a freshly created task is scheduled once, changes its state to
	 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
	 * musn't be checked.
	 */
	if (unlikely(!switch_count))    //判断任务是否已被调度过。
		return;

	if (switch_count != t->last_switch_count) {    //比较当前上下文切换计数与上次记录的切换计数。
		t->last_switch_count = switch_count;    //和上次上下文切换次数不一致，则更新为当前计数
		t->last_switch_time = jiffies;
		return;
	}
	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))    //检查自上次切换以来是否超过了指定的超时时间。
		return;

	trace_sched_process_hang(t);

	if (sysctl_hung_task_panic) {    //如果sysctl_hung_task_panic，则置位相关标记
		console_verbose();
		hung_task_show_lock = true;    //打印锁相关信息
		hung_task_call_panic = true;    //打印堆栈
	}

	/*
	 * Ok, the task did not get scheduled for more than 2 minutes,
	 * complain:
	 */
	if (sysctl_hung_task_warnings) {    //如果sysctl_hung_task_warnings，则循环打印warning想关寄存存，堆栈信息
		if (sysctl_hung_task_warnings > 0)
			sysctl_hung_task_warnings--;
		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
		       t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
		pr_err("      %s %s %.*s\n",
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
			init_utsname()->version);
		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
			" disables this message.\n");
		sched_show_task(t);
		hung_task_show_lock = true;
	}

	touch_nmi_watchdog();    //更新NMI看门狗，防止系统因任务挂起而停止响应。
}

导致hungtask的一些场景：

1、mutex锁持锁睡眠

2、在已持有spinlock锁的情况下中断中再次获取

3、f2fs、io、调度等都会导致

4、wait_event_interrupt接口调用；

5、内存不足、内存分配异常、UFS器件异常、文件系统异常、spinlock或rwsem等各种锁死锁、中断风暴、task本身的异常等等

主线程调用wait_event_interrupt，判断条件为等待中断函数标志位置位；但中断一直未响应，导致wait_event_interrupt条件一直不满足，主线程相当于卡死不调度，超过120s后会触发hungtask；