原文:linux内核启动流程
本文以Linux3.14版本源码为例分析其启动流程。各版本启动代码略有不同,但核心流程与思想万变不离其宗。
内核映像被加载到内存并获得控制权之后,内核启动流程开始。通常,内核映像以压缩形式存储,并不是一个可以执行的内核。因此,内核阶段的首要工作是自解压内核映像。
内核编译生成vmliunx后,通常会对其进行压缩,得到zImage(小内核,小于512KB)或bzImage(大内核,大于512KB)。在它们的头部嵌有解压缩程序。
通过linux/arch/arm/boot/compressed目录下的Makefile寻找到vmlinux文件的链接脚本(vmlinux.lds),从中查找系统启动入口函数。
$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \ $(bswapsdi2) FORCE @$(check_for_multiple_zreladdr) $(call if_changed,ld) @$(check_for_bad_syms)
vmlinux.lds(linux/arch/arm/kernel/vmlinux.lds)链接脚本开头内容
OUTPUT_ARCH(arm) ENTRY(stext) jiffies = jiffies_64; SECTIONS { 。 。 。
得到内核入口函数为 stext(linux/arch/arm/kernel/head.S)
内核引导阶段
-
ENTRY(stext)
-
。
-
。
-
。
-
bl __lookup_processor_type @ r5=procinfo r9=cpuid
//处理器是否支持
-
movs r10, r5 @
invalid processor (r5=0)?
-
THUMB( it eq ) @ force fixup-able long branch encoding
-
beq __error_p @ yes, error 'p' //不支持则打印错误信息
-
-
。
-
。
-
。
-
bl __create_page_tables //创建页表
-
-
/*
-
* The following calls CPU specific code in a position independent
-
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
-
* xxx_proc_info structure selected by __lookup_processor_type
-
* above. On return, the CPU will be ready for the MMU to be
-
* turned on, and r0 will hold the CPU control register value.
-
*/
-
ldr r13, =__mmap_switched @ address to jump to after
//保存MMU使能后跳转地址
-
@ mmu has been enabled
-
adr lr, BSYM(
1f) @
return (PIC) address
-
mov r8, r4 @
set TTBR1 to swapper_pg_dir
-
ARM( add pc, r10, #PROCINFO_INITFUNC )
-
THUMB( add r12, r10, #PROCINFO_INITFUNC )
-
THUMB( mov pc, r12 )
-
1: b __enable_mmu //使能MMU后跳转到__mmap_switched
-
查找标签__mmap_switched所在位置:/linux/arch/arm/kernel/head-common.S
-
__mmap_switched:
-
/*
-
* The following fragment of code
is executed
with the MMU on
in MMU mode,
-
*
and uses absolute addresses; this
is
not position independent.
-
*
-
* r0 = cp
#15 control register
-
* r1 = machine ID
-
* r2 = atags/dtb pointer
-
* r9 = processor ID
-
*/
-
//保存设备信息、设备树及启动参数存储地址
-
。
-
。
-
。
-
b start_kernel
内核初始化阶段
从start_kernel函数开始,内核进入C语言部分,完成内核的大部分初始化工作。
函数所在位置:/linux/init/Main.c
start_kernel涉及大量初始化工作,只例举重要的初始化工作。
-
asmlinkage
void __
init start_kernel(void)
-
{
-
……
//类型判断
-
smp_setup_processor_id();
//smp相关,返回启动CPU号
-
……
-
local_irq_disable();
//关闭当前CPU中断
-
early_boot_irqs_disabled =
true;
-
/*
-
* Interrupts are still disabled. Do necessary setups, then
-
* enable them
-
*/
-
boot_cpu_init();
-
page_address_init();
//初始化页地址
-
pr_notice(
"%s", linux_banner);
//显示内核版本信息
-
setup_arch(&command_line);
-
mm_init_owner(&init_mm, &init_task);
-
mm_init_cpumask(&init_mm);
-
setup_command_line(command_line);
-
setup_nr_cpu_ids();
-
setup_per_cpu_areas();
-
smp_prepare_boot_cpu();
/* arch-specific boot-cpu hooks */
-
-
build_all_zonelists(
NULL,
NULL);
-
page_alloc_init();
//页内存申请初始化
-
-
pr_notice(
"Kernel command line: %s\n", boot_command_line);
//打印内核启动命令行参数
-
parse_early_param();
-
parse_args(
"Booting kernel", static_command_line, __start___param,
-
__stop___param - __start___param,
-
-1,
-1, &unknown_bootoption);
-
-
……
-
/*
-
* Set up the scheduler prior starting any interrupts (such as the
-
* timer interrupt). Full topology setup happens at smp_init()
-
* time - but meanwhile we still have a functioning scheduler.
-
*/
-
sched_init();
//进程调度器初始化
-
/*
-
* Disable preemption - early bootup scheduling is extremely
-
* fragile until we cpu_idle() for the first time.
-
*/
-
preempt_disable();
//禁止内核抢占
-
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
-
local_irq_disable();
//检查关闭CPU中断
-
-
-
/*大量初始化内容 见名知意*/
-
idr_init_cache();
-
rcu_init();
-
tick_nohz_init();
-
context_tracking_init();
-
radix_tree_init();
-
/* init some links before init_ISA_irqs() */
-
early_irq_init();
-
init_IRQ();
-
tick_init();
-
init_timers();
-
hrtimers_init();
-
softirq_init();
-
timekeeping_init();
-
time_init();
-
sched_clock_postinit();
-
perf_event_init();
-
profile_init();
-
call_function_init();
-
WARN(!irqs_disabled(),
"Interrupts were enabled early\n");
-
early_boot_irqs_disabled =
false;
-
local_irq_enable();
//本地中断可以使用了
-
-
kmem_cache_init_late();
-
-
/*
-
* HACK ALERT! This is early. We're enabling the console before
-
* we've done PCI setups etc, and console_init() must be aware of
-
* this. But we do want output early, in case something goes wrong.
-
*/
-
console_init();
//初始化控制台,可以使用printk了
-
if (panic_later)
-
panic(
"Too many boot %s vars at `%s'", panic_later,
-
panic_param);
-
-
lockdep_info();
-
-
/*
-
* Need to run this when irqs are enabled, because it wants
-
* to self-test [hard/soft]-irqs on/off lock inversion bugs
-
* too:
-
*/
-
locking_selftest();
-
-
#ifdef CONFIG_BLK_DEV_INITRD
-
if (initrd_start && !initrd_below_start_ok &&
-
page_to_pfn(virt_to_page((
void *)initrd_start)) < min_low_pfn) {
-
pr_crit(
"initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
-
page_to_pfn(virt_to_page((
void *)initrd_start)),
-
min_low_pfn);
-
initrd_start =
0;
-
}
-
#endif
-
page_cgroup_init();
-
debug_objects_mem_init();
-
kmemleak_init();
-
setup_per_cpu_pageset();
-
numa_policy_init();
-
if (late_time_init)
-
late_time_init();
-
sched_clock_init();
-
calibrate_delay();
-
pidmap_init();
-
anon_vma_init();
-
acpi_early_init();
-
#ifdef CONFIG_X86
-
if (efi_enabled(EFI_RUNTIME_SERVICES))
-
efi_enter_virtual_mode();
-
#endif
-
#ifdef CONFIG_X86_ESPFIX64
-
/* Should be run before the first non-init thread is created */
-
init_espfix_bsp();
-
#endif
-
thread_info_cache_init();
-
cred_init();
-
fork_init(totalram_pages);
//初始化fork
-
proc_caches_init();
-
buffer_init();
-
key_init();
-
security_init();
-
dbg_late_init();
-
vfs_caches_init(totalram_pages);
//虚拟文件系统初始化
-
signals_init();
-
/* rootfs populating might need page-writeback */
-
page_writeback_init();
-
#ifdef CONFIG_PROC_FS
-
proc_root_init();
-
#endif
-
cgroup_init();
-
cpuset_init();
-
taskstats_init_early();
-
delayacct_init();
-
-
check_bugs();
-
-
sfi_init_late();
-
-
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
-
efi_late_init();
-
efi_free_boot_services();
-
}
-
-
ftrace_init();
-
-
/* Do the rest non-__init'ed, we're now alive */
-
rest_init();
-
}
函数最后调用rest_init()函数
-
/*最重要使命:创建kernel_init进程,并进行后续初始化*/
-
static noinline
void __init_refok rest_init(
void)
-
{
-
int pid;
-
-
rcu_scheduler_starting();
-
/*
-
* We need to spawn init first so that it obtains pid 1, however
-
* the init task will end up wanting to create kthreads, which, if
-
* we schedule it before we create kthreadd, will OOPS.
-
*/
-
-
kernel_thread(kernel_init,
NULL,
CLONE_FS |
CLONE_SIGHAND);
//创建kernel_init进程
-
-
numa_default_policy();
-
pid = kernel_thread(kthreadd,
NULL,
CLONE_FS |
CLONE_FILES);
-
rcu_read_lock();
-
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
-
rcu_read_unlock();
-
complete(&kthreadd_done);
-
-
/*
-
* The boot idle thread must execute schedule()
-
* at least once to get things moving:
-
*/
-
init_idle_bootup_task(current);
-
schedule_preempt_disabled();
-
/* Call into cpu_idle with preempt disabled */
-
//cpu_idle就是在系统闲置时用来降低电力的使用和减少热的产生的空转函数,函数至此不再返回,其余工作从kernel_init进程处发起
-
cpu_startup_entry(CPUHP_ONLINE);
-
}
kernel_init函数将完成设备驱动程序的初始化,并调用init_post函数启动用户进程
部分书籍介绍的内核启动流程基于经典的2.6版本,kernel_init函数还会调用init_post函数专门负责_init进程的启动,现版本已经被整合到了一起。
-
static
int __
ref kernel_init(void *unused)
-
{
-
int ret;
-
-
kernel_init_freeable();
//该函数中完成smp开启 驱动初始化 共享内存初始化等工作
-
/* need to finish all async __init code before freeing the memory */
-
async_synchronize_full();
-
free_initmem();
//初始化尾声,清除内存无用数据
-
mark_rodata_ro();
-
system_state = SYSTEM_RUNNING;
-
numa_default_policy();
-
-
flush_delayed_fput();
-
-
if (ramdisk_execute_command) {
-
ret = run_init_process(ramdisk_execute_command);
-
if (!ret)
-
return
0;
-
pr_err(
"Failed to execute %s (error %d)\n",
-
ramdisk_execute_command, ret);
-
}
-
-
/*
-
* We try each of these until one succeeds.
-
*
-
* The Bourne shell can be used instead of init if we are
-
* trying to recover a really broken machine.
-
*寻找init函数,创建一号进程_init (第一个用户空间进程)*/
-
if (execute_command) {
-
ret = run_init_process(execute_command);
-
if (!ret)
-
return
0;
-
pr_err(
"Failed to execute %s (error %d). Attempting defaults...\n",
-
execute_command, ret);
-
}
-
if (!try_to_run_init_process(
"/sbin/init") ||
-
!try_to_run_init_process(
"/etc/init") ||
-
!try_to_run_init_process(
"/bin/init") ||
-
!try_to_run_init_process(
"/bin/sh"))
-
return
0;
-
-
panic(
"No working init found. Try passing init= option to kernel. "
-
"See Linux Documentation/init.txt for guidance.");
-
}
-
static
int __
ref kernel_init(void *unused)
-
{
-
int ret;
-
-
kernel_init_freeable();
//该函数中完成smp开启 驱动初始化 共享内存初始化等工作
-
/* need to finish all async __init code before freeing the memory */
-
async_synchronize_full();
-
free_initmem();
//初始化尾声,清除内存无用数据
-
mark_rodata_ro();
-
system_state = SYSTEM_RUNNING;
-
numa_default_policy();
-
-
flush_delayed_fput();
-
-
if (ramdisk_execute_command) {
-
ret = run_init_process(ramdisk_execute_command);
-
if (!ret)
-
return
0;
-
pr_err(
"Failed to execute %s (error %d)\n",
-
ramdisk_execute_command, ret);
-
}
-
-
/*
-
* We try each of these until one succeeds.
-
*
-
* The Bourne shell can be used instead of init if we are
-
* trying to recover a really broken machine.
-
*寻找init函数,创建一号进程_init (第一个用户空间进程)*/
-
if (execute_command) {
-
ret = run_init_process(execute_command);
-
if (!ret)
-
return
0;
-
pr_err(
"Failed to execute %s (error %d). Attempting defaults...\n",
-
execute_command, ret);
-
}
-
if (!try_to_run_init_process(
"/sbin/init") ||
-
!try_to_run_init_process(
"/etc/init") ||
-
!try_to_run_init_process(
"/bin/init") ||
-
!try_to_run_init_process(
"/bin/sh"))
-
return
0;
-
-
panic(
"No working init found. Try passing init= option to kernel. "
-
"See Linux Documentation/init.txt for guidance.");
-
}
到此,内核初始化已经接近尾声,所有的初始化函数都已经调用,因此free_initmem函数可以舍弃内存的__init_begin至__init_end之间的数据。
当内核被引导并进行初始化后,内核启动了自己的第一个用户空间应用程序_init,这是调用的第一个使用标准C库编译的程序,其进程编号时钟为1.
_init负责出发其他必须的进程,以使系统进入整体可用的状态。
以下为内核启动流程图:
![在这里插入图片描述](https://img-blog.csdn.net/20170315225401860?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvY2MyNDM0OTQ5MjY=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center)