linux rest_init

在linux初始化start_kernel最后会调用rest_init进行最后的初始化工作。Linux下有3个特殊的进程,idle进程(PID = 0), init进程(PID = 1)和kthreadd(PID = 2)。rest_init函数的作用是创建了init和kthreadd这两个进程,并且idle进程继续往下执行,最终在一个while循环中作为空闲进程,如果没有其他进程调度,当前cpu进行轮训或者休眠。

static noinline void __init_refok rest_init(void)
{
	int pid;
	rcu_scheduler_starting();
	/*
	 * We need to spawn init first so that it obtains pid 1, however
	 * the init task will end up wanting to create kthreads, which, if
	 * we schedule it before we create kthreadd, will OOPS.
	 */
	kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);//init=1 号进程的创建
	numa_default_policy();
	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);//内核进程kthread的创建,用来管理内核层线程
	rcu_read_lock();
	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
	rcu_read_unlock();
	complete(&kthreadd_done);//完成量,通知kernel_init线程可以继续往下执行初始化
	/*
	 * The boot idle thread must execute schedule()
	 * at least once to get things moving:
	 */
	init_idle_bootup_task(current);//设置idle进程的调度类为idle_sched_class
	schedule_preempt_disabled();//打开抢占并调度,当调度回来再次执行的时候,关闭抢占,接着往下执行
	/* Call into cpu_idle with preempt disabled */
	cpu_startup_entry(CPUHP_ONLINE);//idle进程最终的执行代码
}

cpu_startup_entry完成idle进程的最后工作:

void cpu_startup_entry(enum cpuhp_state state)
{
	/*
	 * This #ifdef needs to die, but it's too late in the cycle to
	 * make this generic (arm and sh have never invoked the canary
	 * init for the non boot cpus!). Will be fixed in 3.11
	 */
#ifdef CONFIG_X86
	/*
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us. The boot CPU already has it initialized but no harm
	 * in doing it again. This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
	 */
	boot_init_stack_canary();
#endif
	current_set_polling();//设置idle线程的状态为polling
	arch_cpu_idle_prepare();
	printk(KERN_INFO "  cpu_startup_entry.\n");
	cpu_idle_loop();
}

核心函数为cpu_idle_loop,最终操作系统在没有其他任务调度的时候,idle线程就在cpu_idle_loop这个循环中执行。

static void cpu_idle_loop(void)
{
	while (1) {
		tick_nohz_idle_enter();

		while (!need_resched()) {//当不需要调度的时候,idle进程则进while循环中执行
			check_pgt_cache();
			rmb();

			if (cpu_is_offline(smp_processor_id()))
				arch_cpu_idle_dead();

			local_irq_disable();
			arch_cpu_idle_enter();

			/*
			 * In poll mode we reenable interrupts and spin.
			 *
			 * Also if we detected in the wakeup from idle
			 * path that the tick broadcast device expired
			 * for us, we don't want to go deep idle as we
			 * know that the IPI is going to arrive right
			 * away
			 */
			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
				cpu_idle_poll(); //如果设置了cpu_idle_force_poll,或者broadcast超时,则调用poll方法
			} else {
				current_clr_polling();
				if (!need_resched()) {
					stop_critical_timings();
					rcu_idle_enter();
					arch_cpu_idle();
					WARN_ON_ONCE(irqs_disabled());
					rcu_idle_exit();
					start_critical_timings();
				} else {
					local_irq_enable();
				}
				current_set_polling();
			}
			arch_cpu_idle_exit();
		}
		tick_nohz_idle_exit();
		schedule_preempt_disabled();//如果需要调度,则调用schedule()进行调度
	}
}

cpu_idle_loop分为两种情况:

(1)如果idle进程设置了TIF_NEED_RESCHED标志,则调用schedule_preempt_disabled进行调度

(2)如果当前进程不需要被调度,即调度idle进程运行以后,还是没有其他进程需要调度,当前cpu则进行poll或者休眠。又可以分为两种情况,即当前体系架构是实现了poll还是休眠,对于这两种情况,关键代码是这块:

if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
		cpu_idle_poll();
} else {
		current_clr_polling();
		if (!need_resched()) {
			stop_critical_timings();
			rcu_idle_enter();
			arch_cpu_idle();
			WARN_ON_ONCE(irqs_disabled());
			rcu_idle_exit();
			start_critical_timings();
		} else {
			local_irq_enable();
		}
		current_set_polling();
}

(2.1)可以看到第一种情况,当设置了cpu_idle_force_poll 或者broadcast超时,则调用cpu_idle_poll,当相关体系结构没有实现arch_cpu_idle时,arch_cpu_idle函数为:

void __weak arch_cpu_idle(void)
{
	cpu_idle_force_poll = 1;
	local_irq_enable();
}

即即使走else分支,总是设置cpu_idle_force_poll 为1,下次while循环,就会走if分支,所以在这种体系架构下面,idle进程总是会执行cpu_idle_poll:

static inline int cpu_idle_poll(void)
{
	rcu_idle_enter();
	trace_cpu_idle_rcuidle(0, smp_processor_id());
	local_irq_enable();
	while (!need_resched())
		cpu_relax();
	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
	rcu_idle_exit();
	return 1;
}
#if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327)
#define cpu_relax()			smp_mb()
#else
#define cpu_relax()			barrier()
#endif

可以看到在中断到来以后,每有设置idle进程需要调度的情况下,cpu_idle_poll程序会一直在while循环里调用cpu_relax,该操作是个内存屏障操作。

(2.2)如果对用的体系架构下面重新实现了arch_cpu_idle:

void arch_cpu_idle(void)
{
	if (cpuidle_idle_call())
		default_idle();
}
static void default_idle(void)
{
	if (arm_pm_idle)//我们的体系架构没有设置该函数
		arm_pm_idle();
	else
		cpu_do_idle();
	local_irq_enable();
}

核心函数是cpu_do_idle,我们的体系架构下是

ENTRY(cpu_arm920_do_idle)
	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
	mov	pc, lr

该函数的作用是通过cp15协处理器,使cpu进入低电压状态,停止运行等待中断出现,at91rm9200进入省电模式。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值