Linux 进程状态：TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE

JiMoKuangXiangQu

已于 2025-03-27 16:43:32 修改

阅读量1.5k

点赞数 16

分类专栏： # 进程调度文章标签： Linux 进程状态可中断睡眠非可中断睡眠

于 2024-06-25 14:54:16 首次发布

本文链接：https://blog.csdn.net/JiMoKuangXiangQu/article/details/139952410

版权

进程调度专栏收录该内容

14 篇文章

订阅专栏

文章目录

1. 前言
2. TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE
3. 参考资料

1. 前言

限于作者能力水平，本文可能存在谬误，因此而给读者带来的损失，作者不做任何承诺。

2. TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE

2.1 语义

以下是从文章 Process Scheduling in the Kernel 摘录的对进程状态 TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE 的说明：

TASK_INTERRUPTIBLE

identifies a process that is suspended (sleeping) until some condition becomes true. 
Raising an interrupt, releasing a system resource the process is waiting for, or 
delivering a signal are examples of conditions that might wake up the process, 
that is put its state back to TASK_RUNNNING. 

TASK_UNINTERRUPTIBLE

identifies a process that is suspended like in the TASK_INTERRUPTIBLE state, except that 
in this case delivering a signal will not wake up the process. This process state is 
seldom used.

简单翻译一下：

. TASK_INTERRUPTIBLE
  进程进入睡眠直到等待，中断、信号、或等待的条件成立时，可唤醒进程，并可能将进程重新置为运行态
  (TASK_RUNNNING)。

. TASK_UNINTERRUPTIBLE
  类似于 TASK_INTERRUPTIBLE，但无法通过信号唤醒进程。

2.2 实现

从 2.1 了解了对 TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE 的语义，本小节从代码层面看内核是如何实现它们的。

2.2.1 TASK_INTERRUPTIBLE 实现

以 socket 通信 TCP 三次握手过程中的 accept() 调用为例，来说明 TASK_INTERRUPTIBLE 的语义实现。

服务端调用 accept() 等待 TCP 连接的三次握手完成：

sys_accept()
	sys_accept4()
		sock->ops->accept() = inet_accept()
			sk1->sk_prot->accept() = inet_csk_accept()
				if (reqsk_queue_empty(queue)) {
					/* 阻塞模式下，永不超时，即 timeout 为 MAX_SCHEDULE_TIMEOUT */
					long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
					...
					/* 等待 TCP 连接的三次握手完成 */
					error = inet_csk_wait_for_connect(sk, timeo);
					...
				}

static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	DEFINE_WAIT(wait);
	int err;

	for (;;) {
		/* 将进程添加到等待队列 sk_sleep(sk) */
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
				TASK_INTERRUPTIBLE);
		release_sock(sk);
		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
			timeo = schedule_timeout(timeo); /* (1) 主动调度：进入 TASK_INTERRUPTIBLE 睡眠等待 */
		...
		lock_sock(sk);
		err = 0;
		if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) /* 有连接完成三次握手， */
			break; /* 正常结束等待 */
		err = -EINVAL;
		if (sk->sk_state != TCP_LISTEN)
			break;
		err = sock_intr_errno(timeo);
		if (signal_pending(current)) /* 进程有信号挂起， */
			break; /* 终止等待过程，处理信号 */
		err = -EAGAIN; /* 非阻塞方式下等待超时错误码 EAGAIN */
		if (!timeo) /* 非阻塞方式下等待超时， */
			break; /* 终止等待过程，用户收到错误码 EAGAIN，提示可以重试 */
	}
	/* 将进程从等待队列 sk_sleep(sk) 移除，重新进入 TASK_RUNNING 状态 */
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
	unsigned long flags;

	__set_current_state(TASK_RUNNING); /* 进程重新进入 TASK_RUNNING 状态 */
	/*
	 * We can check for list emptiness outside the lock
	 * IFF:
	 *  - we use the "careful" check that verifies both
	 *    the next and prev pointers, so that there cannot
	 *    be any half-pending updates in progress on other
	 *    CPU's that we haven't seen yet (and that might
	 *    still change the stack area.
	 * and
	 *  - all other users take the lock (ie we can only
	 *    have _one_ other CPU that looks at or modifies
	 *    the list).
	 */
	if (!list_empty_careful(&wq_entry->entry)) {
		spin_lock_irqsave(&wq_head->lock, flags);
		list_del_init(&wq_entry->entry);
		spin_unlock_irqrestore(&wq_head->lock, flags);
	}
}

看下调度细节：

/* kernel/time/timer.c */

signed long __sched schedule_timeout(signed long timeout)
{
	...
	schedule();
	...
}

/* kernel/sched/core.c */

asmlinkage __visible void __sched schedule(void)
{
	struct task_struct *tsk = current;

	sched_submit_work(tsk);
	do {
		preempt_disable(); /* 关闭抢占 */
		__schedule(false); /* 主动调度 */
		sched_preempt_enable_no_resched(); /* 开启抢占 */
	} while (need_resched());
}

static void __sched notrace __schedule(bool preempt)
{
	struct task_struct *prev, *next;
	...
	struct rq *rq;
	int cpu;

	cpu = smp_processor_id();
	rq = cpu_rq(cpu);
	prev = rq->curr;

	...
	local_irq_disable();

	...
	if (!preempt && prev->state) {
		/*
		 * 如果进程 @prev 当前有信号挂起，不进入睡眠，
		 * 而是继续保持 可运行 状态，以备后续被调度时处理信号。
		 */
		if (unlikely(signal_pending_state(prev->state, prev))) {
			prev->state = TASK_RUNNING;
		} else {
			/* 从可运行队列移除 */
			deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
			prev->on_rq = 0;
			
			...
		}
	}

	next = pick_next_task(rq, prev, &rf); /* 挑选要执行的进程 */
	...

	if (likely(prev != next)) { /* 切换到不同进程 */
		rq->nr_switches++;
		rq->curr = next;
  		...
  		
  		/* Also unlocks the rq: */
  		rq = context_switch(rq, prev, next, &rf); /* 进程上下文切换 */
	} else {
		...
	}
}

从上面分析看到，服务端在 accept() 中在 TASK_INTERRUPTIBLE 状态睡眠等待。接着看在 3 种不同场景下唤醒进程的过程。

2.2.1.1 等待的条件成立时唤醒

正常唤醒过程，在连接三次握手完成唤醒过程：

tcp_child_process(sk, nsk, skb)
	...
	parent->sk_data_ready(parent) = sock_def_readable()
		wq = rcu_dereference(sk->sk_wq);
		if (skwq_has_sleeper(wq))
			/* 唤醒在 accept() 中等待连接的进程 */
			wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI 
							POLLRDNORM | POLLRDBAND);
		...
    ...

2.2.1.2 信号唤醒

异常唤醒过程，通过信号唤醒进程：

static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
			int group, int from_ancestor_ns)
{
	...
	complete_signal(sig, t, group);
}

static void complete_signal(int sig, struct task_struct *p, int group)
{
	...
	signal_wake_up(t, sig == SIGKILL);
	...
}

static inline void signal_wake_up(struct task_struct *t, bool resume)
{
	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
}

void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
	set_tsk_thread_flag(t, TIF_SIGPENDING); /* 标记进程 @t 有信号挂起 */
	/*
	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
	 * case. We don't check t->state here because there is a race with it
	 * executing another processor and just now entering stopped state.
	 * By using wake_up_state, we ensure the process will wake up and
	 * handle its death signal.
	 */
	/* 
	 * 唤醒 TASK_INTERRUPTIBLE 状态的进程处理信号： 
	 * . 将进程设置为 TASK_RUNNING 状态
	 * . 选择运行的 CPU
	 * . 设置 TIF_NEED_RESCHED 标志
	 * . 其它 ......
	 */
	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
		kick_process(t);
}

2.2.1.3 中断唤醒

异常唤醒过程，中断唤醒。如果被中断的进程因信号投递而被唤醒(设置了 TIF_NEED_RESCHED 标志)，将发生中断处理结束时的抢占。由于进程被中断时，可能处于内核态和用户态，所以有两种不同的执行路径。本文以 ARMv7 架构中断处理过程为例分别加以说明。

2.2.1.3.1 内核态的处理过程

/* arch/arm/kernel/entry-armv.S */

	.align 5
__irq_svc:
	svc_entry
	irq_handler /* 处理 内核态 中断 */

#ifdef CONFIG_PREEMPT
	/* 开启了 内核态抢占 的情形 */
	ldr r8, [tsk, #TI_PREEMPT]  @ get preempt count
	ldr r0, [tsk, #TI_FLAGS]  @ get flags
	teq r8, #0    @ if preempt count != 0
	movne r0, #0    @ force flags to 0
	tst r0, #_TIF_NEED_RESCHED /* 在本文场景，检查因信号投递而设置的 TIF_NEED_RESCHED 标志 */
	blne svc_preempt /* 中断处理结束后，发起 内核态 抢占 */
#endif

	svc_exit r5, irq = 1   @ return from exception
 UNWIND(.fnend  )
ENDPROC(__irq_svc)


#ifdef CONFIG_PREEMPT
svc_preempt:
	mov r8, lr
	/* 发起内核态抢占 */
1: bl preempt_schedule_irq  @ irq en/disable is done inside
	ldr r0, [tsk, #TI_FLAGS]  @ get new tasks TI_FLAGS
	tst r0, #_TIF_NEED_RESCHED
	reteq r8    @ go again
	b 1b
#endif

asmlinkage __visible void __sched preempt_schedule_irq(void)
{
	enum ctx_state prev_state;

	/* Catch callers which need to be fixed */
	BUG_ON(preempt_count() || !irqs_disabled());

	prev_state = exception_enter();

	do {
		preempt_disable();
		local_irq_enable();
		__schedule(true); /* 抢占调度 */
		local_irq_disable();
		sched_preempt_enable_no_resched();
	} while (need_resched());

	exception_exit(prev_state);
}

2.2.1.3.2 用户态的处理过程

/* arch/arm/kernel/entry-armv.S */

	.align 5
__irq_usr:
	usr_entry
	kuser_cmpxchg_check
	irq_handler /* 处理 用户态 中断 */
	get_thread_info tsk
	mov why, #0
	b ret_to_user_from_irq
 UNWIND(.fnend  )
ENDPROC(__irq_usr)

/* arch/arm/include/asm/thread_info.h */

/*
 * Change these and you break ASM code in entry-common.S
 */
#define _TIF_WORK_MASK  (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
			_TIF_NOTIFY_RESUME | _TIF_UPROBE)

/* arch/arm/kernel/entry-common.S */

ENTRY(ret_to_user_from_irq)
	...
	ldr r1, [tsk, #TI_FLAGS]
	tst r1, #_TIF_WORK_MASK /* 检查 是否需要调度、是否有信号要处理 等等 */
	bne slow_work_pending /* 处理 调度、信号 等等 */
no_work_pending:
	...
	restore_user_regs fast = 0, offset = 0
ENDPROC(ret_to_user_from_irq)

	/* 处理 调度、信号 等等 */
slow_work_pending:
	...
	bl do_work_pending
	...

/* arch/arm/kernel/signal.c */

asmlinkage int
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{
	...
	do {
		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
			schedule(); /* 执行调度 */
  		} else {
  			...
  		}
  		...
  		thread_flags = current_thread_info()->flags;
	} while (thread_flags & _TIF_WORK_MASK);
	...
}

2.2.2 TASK_UNINTERRUPTIBLE 实现

TASK_UNINTERRUPTIBLE 状态的典型场景是 msleep() 调用：

void msleep(unsigned int msecs)
{
	unsigned long timeout = msecs_to_jiffies(msecs) + 1;

	while (timeout)
		timeout = schedule_timeout_uninterruptible(timeout);
}

signed long __sched schedule_timeout_uninterruptible(signed long timeout)
{
	__set_current_state(TASK_UNINTERRUPTIBLE);
	return schedule_timeout(timeout);
}

msleep() 将进程设置为 TASK_UNINTERRUPTIBLE 状态，不会让人觉得意外。毕竟，msleep() 的本意就是让进程睡够指定时间才被唤醒，睡眠过程不可被中断(即 UNINTERRUPTIBLE)。如果时间没睡够中途就被唤醒，这不符合 msleep() 的语义。

再看一个驱动代码片段示例：

/* drivers/hwmon/abituguru.c */

static int abituguru_send_address(struct abituguru_data *data,
	u8 bank_addr, u8 sensor_addr, int retries)
{
	...
	for (;;) {
		...
		if (abituguru_wait(data, ABIT_UGURU_STATUS_INPUT)) {
			if (retries) {
				/* 进入 TASK_UNINTERRUPTIBLE 等待超时时间 ABIT_UGURU_RETRY_DELAY 到达 */
				set_current_state(TASK_UNINTERRUPTIBLE);
				schedule_timeout(ABIT_UGURU_RETRY_DELAY);
				retries--;
				continue;
			}
			...
		}
		...
	}
}

/* kernel/time/timer.c */

signed long __sched schedule_timeout(signed long timeout)
{
	struct timer_list timer;
	unsigned long expire;
 
	switch (timeout)
	{
	case MAX_SCHEDULE_TIMEOUT:
		schedule();
		goto out;
	default:
		...
	}

	expire = timeout + jiffies;

	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
	__mod_timer(&timer, expire, false);
	schedule(); /* 主动调度出去，等待超时时间、或等待的事件 到达 */
	...

	timeout = expire - jiffies; /* 剩余的超时时间: 有可能 等待的事件到达 而被 提前唤醒 */

out:
	return timeout < 0 ? 0 : timeout;
}

/* 超时后唤醒进进程 */
static void process_timeout(unsigned long __data)
{
	wake_up_process((struct task_struct *)__data);
}

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL   (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)

int wake_up_process(struct task_struct *p)
{
	return try_to_wake_up(p, TASK_NORMAL, 0);
}

为什么 TASK_UNINTERRUPTIBLE 进程不会因中断或信号而唤醒？从前面的 signal_wake_up_state() 分析已经有了答案，这里再重复一下：

void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
	...
	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
		kick_process(t);
}

可见，信号只会唤醒 TASK_INTERRUPTIBLE 状态的进程。既然信号不唤醒 TASK_UNINTERRUPTIBLE 状态进程，自然也不会进一步设置进程的 TIF_NEED_RESCHED 标志；同时，除非等待的条件满足(如超时到达)，也没有其它可能对 TASK_UNINTERRUPTIBLE 状态进程设置 TIF_NEED_RESCHED 标志。综上所述，中断无法唤醒 TASK_INTERRUPTIBLE 状态的进程，因为中断处理结束时的抢占调度依赖于 TIF_NEED_RESCHED 标志。

在本小节驱动例子中，TASK_UNINTERRUPTIBLE 状态的进程等到超时时间 ABIT_UGURU_RETRY_DELAY 到达时被唤醒。

2.3 小结

TASK_INTERRUPTIBLE 在主动调度出去时，如果当前没有信号挂起，就会从 CPU 的运行队列中移除，但如果当前有信号挂起，会继续保持 TASK_RUNNING 状态，且不会从 CPU 的运行队列中移除；TASK_UNINTERRUPTIBLE 在主动调度出去时，直接从 CPU 的运行队列中移除。TASK_INTERRUPTIBLE 可看作浅度睡眠。

TASK_INTERRUPTIBLE 睡眠期间，可能被等待的事件、信号、中断唤醒；TASK_UNINTERRUPTIBLE 睡眠期间，无法被信号、中断唤醒，只能被等待的事件唤醒，如前面例子中的超时时间到达。TASK_UNINTERRUPTIBLE 可看作深度睡眠。

TASK_INTERRUPTIBLE 工具观察为 S 态，TASK_UNINTERRUPTIBLE 工具观察为 D 态。

Linux 内核提供对长时间处于 TASK_UNINTERRUPTIBLE 态进程的监测机制，细节可参考博文 Linux: hung task 检测机制简析。