1. Linux启动流程框图
ENTRY(stext) (arch/arm/kernel/head.S)
|_ safe_svcmode_maskall (arch/arm/include/asm/assembler.h)
|_ __lookup_processor_type (arch/arm/kernel/head-common.S)
|_ __vet_atags (arch/arm/kernel/head-common.S)
|_ __create_page_tables (arch/arm/kernel/head.S)
|_ __mmap_switched (arch/arm/kernel/head-common.S)
| |_ start_kernel (init/main.c)
| |_ lockdep_init();
| |_ set_task_stack_end_magic(&init_task);
| |_ ...
| |_ /* 解析使用early_param宏定义的参数 */
| |_ parse_early_param();
| |_ /* 解析使用__setup宏定义的参数,其中包含console,root */
| |_ parse_args("Booting kernel",..., &unknown_bootoption);
| |_ ...
| |_ rest_init(); (init/main.c)
| |_ kernel_init(回调函数) (init/main.c)
| | |_ kernel_init_freeable(); (init/main.c)
| | | |_ ...
| | | |_ do_basic_setup(); (init/main.c)
| | | | |_ ...
| | | | |_ driver_init(); (drivers/base/init.c)
| | | | | |_ devtmpfs_init();
| | | | | |_ devices_init();
| | | | | |_ buses_init();
| | | | | |_ classes_init();
| | | | | |_ ...
| | | | |_ ...
| | | |_ sys_open((const char __user *) "/dev/console"...
| | | |_ (void) sys_dup(0);
| | | |_ (void) sys_dup(0);
| | | |_ prepare_namespace(); (init/main.c)
| | | |_ mount_root(); (init/do_mounts.c)
| | | | |_ create_dev("/dev/root", ROOT_DEV);
| | | | |_ mount_block_root("/dev/root", root_mountflags);
| | | | |_ do_mount_root(name, p, flags, root_mount_data);
| | | | | |_ /* 挂载成功则打印以下语句 */
| | | | | |_ printk(KERN_INFO"VFS: Mounted root (%s...
| | | | |_ /* 挂载出错则打印以下语句 */
| | | | |_ printk("VFS: Cannot open root device ...
| | | | |_ printk("Please append a correct \"root=\"...
| | | | |_ printk_all_partitions();
| | | | |_ panic("VFS: Unable to mount root fs on %s", b);
| | | |_ devtmpfs_mount("dev");
| | | | |_ sys_mount("devtmpfs", ...
| | | | |_ /* 挂载出错则打印以下语句 */
| | | | |_ printk(KERN_INFO "devtmpfs: error mounting %i...
| | | | |_ /* 挂载成功则打印以下语句 */
| | | | |_ printk(KERN_INFO "devtmpfs: mounted\n");
| | | |_ sys_mount(".", "/", NULL, MS_MOVE, NULL);
| | | |_ sys_chroot(".");
| | |_ run_init_process(ramdisk_execute_command);
| | |_ run_init_process(execute_command);
| | |_ try_to_run_init_process("/sbin/init")...
| |_ kthreadd(回调函数)
| |_ cpu_startup_entry(CPUHP_ONLINE); (kernel/sched/idle.c)
|_ __enable_mmu (arch/arm/kernel/head.S)
2. Linux启动流程代码分析
编译一遍linux内核之后,将编译生成的内核镜像文件vmlinux删除,并加上V=1
进行make编译。以32位ARM架构CPU为例,可以找到以下语句:
arm-linux-gnueabihf-ld -EL -p --no-undefined -X --pic-veneer --build-id -o vmlinux -T ./arch/arm/kernel/vmlinux.lds arch/arm/kernel/head.o ...
所以它是以arch/arm/kernel/vmlinux.lds
文件进行链接的,看下里面的内容:
OUTPUT_ARCH(arm)
ENTRY(stext)
jiffies = jiffies_64;
SECTIONS
{
...
通过ENTRY(stext)
知道内核的入口,那就继续执行grep -rnw "stext" ./
命令找下入口在哪个文件中定义,关于arm(32位)架构的可以看到以下结果:
./arch/arm/kernel/vmlinux.lds:493:ENTRY(stext)
./arch/arm/kernel/vmlinux.lds.S:49:ENTRY(stext)
./arch/arm/kernel/head-nommu.S:44:ENTRY(stext)
./arch/arm/kernel/head-nommu.S:47:ENTRY(stext)
./arch/arm/kernel/head-nommu.S:87:ENDPROC(stext)
./arch/arm/kernel/head.S:80:ENTRY(stext)
./arch/arm/kernel/head.S:145:ENDPROC(stext)
在前面的链接命令也可以知道,紧跟着链接脚本vmlinux.lds后面的就是arch/arm/kernel/head.o
,所以内核的入口就在它这里。
ENTRY(stext)
59 /*
60 * Kernel startup entry point.
61 * ---------------------------
62 *
63 * This is normally called from the decompressor code. The requirements
64 * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
65 * r1 = machine nr, r2 = atags or dtb pointer.
66 *
67 * This code is mostly position independent, so if you link the kernel at
68 * 0xc0008000, you call this at __pa(0xc0008000).
69 *
70 * See linux/arch/arm/tools/mach-types for the complete list of machine
71 * numbers for r1.
72 *
73 * We're trying to keep crap to a minimum; DO NOT add any machine specific
74 * crap here - that's what the boot loader (or in extreme, well justified
75 * circumstances, zImage) is for.
76 */
77 .arm
78
79 __HEAD
80 ENTRY(stext)
81 ARM_BE8(setend be ) @ ensure we are in BE8 mode
82
83 THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM.
84 THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
85 THUMB( .thumb ) @ switch to Thumb now.
86 THUMB(1: )
87
88 #ifdef CONFIG_ARM_VIRT_EXT
89 bl __hyp_stub_install
90 #endif
91 @ ensure svc mode and all interrupts masked
92 safe_svcmode_maskall r9
93
94 mrc p15, 0, r9, c0, c0 @ get processor id
95 bl __lookup_processor_type @ r5=procinfo r9=cpuid
96 movs r10, r5 @ invalid processor (r5=0)?
97 THUMB( it eq ) @ force fixup-able long branch encoding
98 beq __error_p @ yes, error 'p'
99
100 #ifdef CONFIG_ARM_LPAE
101 mrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0
102 and r3, r3, #0xf @ extract VMSA support
103 cmp r3, #5 @ long-descriptor translation table format?
104 THUMB( it lo ) @ force fixup-able long branch encoding
105 blo __error_lpae @ only classic page table format
106 #endif
107
108 #ifndef CONFIG_XIP_KERNEL
109 adr r3, 2f
110 ldmia r3, {r4, r8}
111 sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET)
112 add r8, r8, r4 @ PHYS_OFFSET
113 #else
114 ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case
115 #endif
116
117 /*
118 * r1 = machine no, r2 = atags or dtb,
119 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
120 */
121 bl __vet_atags
122 #ifdef CONFIG_SMP_ON_UP
123 bl __fixup_smp
124 #endif
125 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
126 bl __fixup_pv_table
127 #endif
128 bl __create_page_tables
129
130 /*
131 * The following calls CPU specific code in a position independent
132 * manner. See arch/arm/mm/proc-*.S for details. r10 = base of
133 * xxx_proc_info structure selected by __lookup_processor_type
134 * above. On return, the CPU will be ready for the MMU to be
135 * turned on, and r0 will hold the CPU control register value.
136 */
137 ldr r13, =__mmap_switched @ address to jump to after
138 @ mmu has been enabled
139 adr lr, BSYM(1f) @ return (PIC) address
140 mov r8, r4 @ set TTBR1 to swapper_pg_dir
141 ldr r12, [r10, #PROCINFO_INITFUNC]
142 add r12, r12, r10
143 ret r12
144 1: b __enable_mmu
145 ENDPROC(stext)
从前面59~76行的注释也可以知道,linux的启动需要以下几个条件:
- 关闭MMU;
- 关闭D-Cache,I-cache则无所谓;
- r0 = 0、r1 = 机器ID、r2 = BootLoader传递的atags或者是dts设备树的地址;
接着开始看内核入口函数stext:
- 81~86行:首先是确保当前是BE8模式(大端字节序的一种),判断是否需要切换到thumb指令集模式;
- 92行:接着调用
safe_svcmode_maskall r9
确保CPU进入了SVC模式,并且屏蔽所有中断; - 94行:获取处理器ID保存到r9寄存器中;
- 95行:跳转到
__lookup_processor_type
函数检查linux内核是否支持该CPU,内部会将procinfo(process info,处理器信息)类型结构体实例保存到r5寄存器中; - 98行:不支持则打印错误信息;
- 117~121行:跳转到
__vet_atags
函数判断注释里提及的参数的合法性; - 128行:跳转到
__create_page_tables
函数去创建页表,该函数在arch/arm/kernel/head.S
中定义; - 137行:将函数
__mmap_switched
(里面会跳转到start_kernel
函数)的地址存放到r13(sp)寄存器中,后面使能MMU后会跳转到它这里运行; - 144行:跳转到
__enable_mmu
函数使能MMU,该函数在arch/arm/kernel/head.S
中定义。
safe_svcmode_maskall
/* file: arch/arm/include/asm/assembler.h */
/*
* Helper macro to enter SVC mode cleanly and mask interrupts. reg is
* a scratch register for the macro to overwrite.
*
* This macro is intended for forcing the CPU into SVC mode at boot time.
* you cannot return to the original mode.
*/
.macro safe_svcmode_maskall reg:req
#if __LINUX_ARM_ARCH__ >= 6 && !defined(CONFIG_CPU_V7M)
mrs \reg , cpsr
eor \reg, \reg, #HYP_MODE
tst \reg, #MODE_MASK
bic \reg , \reg , #MODE_MASK
orr \reg , \reg , #PSR_I_BIT | PSR_F_BIT | SVC_MODE
THUMB( orr \reg , \reg , #PSR_T_BIT )
bne 1f
orr \reg, \reg, #PSR_A_BIT
adr lr, BSYM(2f)
msr spsr_cxsf, \reg
__MSR_ELR_HYP(14)
__ERET
1: msr cpsr_c, \reg
2:
#else
/*
* workaround for possibly broken pre-v6 hardware
* (akita, Sharp Zaurus C-1000, PXA270-based)
*/
setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, \reg
#endif
.endm
__lookup_processor_type
/* file: arch/arm/kernel/head-common.S */
/*
* Read processor ID register (CP#15, CR0), and look up in the linker-built
* supported processor list. Note that we can't use the absolute addresses
* for the __proc_info lists since we aren't running with the MMU on
* (and therefore, we are not in the correct address space). We have to
* calculate the offset.
*
* r9 = cpuid
* Returns:
* r3, r4, r6 corrupted
* r5 = proc_info pointer in physical address space
* r9 = cpuid (preserved)
*/
__lookup_processor_type:
adr r3, __lookup_processor_type_data
ldmia r3, {r4 - r6}
sub r3, r3, r4 @ get offset between virt&phys
add r5, r5, r3 @ convert virt addresses to
add r6, r6, r3 @ physical address space
1: ldmia r5, {r3, r4} @ value, mask
and r4, r4, r9 @ mask wanted bits
teq r3, r4
beq 2f
add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
2: ret lr
ENDPROC(__lookup_processor_type)
__vet_atags
/* file: arch/arm/kernel/head-common.S */
/* Determine validity of the r2 atags pointer. The heuristic requires
* that the pointer be aligned, in the first 16k of physical RAM and
* that the ATAG_CORE marker is first and present. If CONFIG_OF_FLATTREE
* is selected, then it will also accept a dtb pointer. Future revisions
* of this function may be more lenient with the physical address and
* may also be able to move the ATAGS block if necessary.
*
* Returns:
* r2 either valid atags pointer, valid dtb pointer, or zero
* r5, r6 corrupted
*/
__vet_atags:
tst r2, #0x3 @ aligned?
bne 1f
ldr r5, [r2, #0]
#ifdef CONFIG_OF_FLATTREE
ldr r6, =OF_DT_MAGIC @ is it a DTB?
cmp r5, r6
beq 2f
#endif
cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE?
cmpne r5, #ATAG_CORE_SIZE_EMPTY
bne 1f
ldr r5, [r2, #4]
ldr r6, =ATAG_CORE
cmp r5, r6
bne 1f
2: ret lr @ atag/dtb pointer is ok
1: mov r2, #0
ret lr
ENDPROC(__vet_atags)
__mmap_switched
/* file: arch/arm/kernel/head-common.S */
/*
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags/dtb pointer
* r9 = processor ID
*/
__INIT
__mmap_switched:
adr r3, __mmap_switched_data
ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b
ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
cmp r7, #0
strne r0, [r7] @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)
start_kernel
/* file: init/main.c */
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init(); /* 初始化两个哈希表 */
set_task_stack_end_magic(&init_task); /* 设置任务栈结束魔术数,用于栈溢出检测 */
smp_setup_processor_id(); /* 跟 SMP 有关(多核处理器),设置处理器 ID */
debug_objects_early_init(); /* 做一些和 debug 有关的初始化 */
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary(); /* 栈溢出检测初始化 */
cgroup_init_early(); /* cgroup 初始化,cgroup 用于控制 Linux 系统资源 */
local_irq_disable(); /* 关闭当前 CPU 中断 */
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init(); /* 跟 CPU 有关的初始化 */
page_address_init(); /* 页地址相关的初始化 */
pr_notice("%s", linux_banner); /* 打印 Linux 版本号、编译时间等信息,linux_banner在 init/version.c中定义 */
setup_arch(&command_line); /* 重要!!架构相关的初始化,会解析ATAGS或dtb设备树文件 */
mm_init_cpumask(&init_mm); /* 和内存有关的初始化 */
setup_command_line(command_line); /* 存储命令行参数 */
setup_nr_cpu_ids(); /* 获取CPU核心数量 */
setup_per_cpu_areas(); /* 设置每一个CPU */
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ /* 看名字应该是一些准备工作 */
build_all_zonelists(NULL, NULL); /* 建立系统内存页区(zone)链表 */
page_alloc_init(); /* 处理用于热插拔 CPU 的页 */
pr_notice("Kernel command line: %s\n", boot_command_line); /* 打印命令行信息 */
parse_early_param(); /* 解析命令行中的 console 参数 */
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
set_init_arg);
jump_label_init();
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0); /* 设置 log 使用的缓冲区*/
pidhash_init(); /* 构建进程ID哈希表,方便快速搜索进程信息 */
vfs_caches_init_early(); /* 预初始化VFS(虚拟文件系统) */
sort_main_extable(); /* 定义内核异常列表 */
trap_init(); /* 完成对系统保留中断向量的初始化 */
mm_init(); /* 内存管理初始化 */
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init(); /* 初始化调度器,主要是初始化一些结构体 */
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable(); /* 关闭优先级抢占 */
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable(); /* 检查中断是否关闭,如果没有的话就关闭中断 */
idr_init_cache(); /* IDR 初始化,IDR 是 Linux 内核的整数管理机
* 制,也就是将一个整数 ID 与一个指针关联起来。*/
rcu_init(); /* 初始化 RCU,RCU 全称为 Read Copy Update(读-拷贝修改) */
/* trace_printk() and trace points may be used after this */
trace_init(); /* 跟踪调试相关初始化 */
context_tracking_init();
radix_tree_init(); /* 跟踪调试相关初始化 */
/* init some links before init_ISA_irqs() */
early_irq_init(); /* 初始中断相关初始化,主要是注册 irq_desc 结构体变
* 量,因为 Linux 内核使用 irq_desc 来描述一个中断。*/
init_IRQ(); /* 中断初始化 */
tick_init(); /* tick 初始化 */
rcu_init_nohz();
init_timers(); /* 初始化定时器 */
hrtimers_init(); /* 初始化高精度定时器 */
softirq_init(); /* 软中断初始化 */
timekeeping_init();
time_init(); /* 初始化系统时间 */
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable(); /* 使能中断 */
kmem_cache_init_late(); /* slab 初始化,slab 是 Linux 内存分配器 */
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init(); /* 初始化控制台,之前 printk 打印的信息都存放
* 缓冲区中,并没有打印出来。只有调用此函数
* 初始化控制台以后才能在控制台上打印信息。*/
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info(); /* 如果定义了宏 CONFIG_LOCKDEP,那么此函数打印一些信息。*/
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest(); /* 锁自测 */
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_ext_init();
debug_objects_mem_init();
kmemleak_init(); /* kmemleak 初始化,kmemleak 用于检查内存泄漏 */
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay(); /* 测定 BogoMIPS 值,可以通过 BogoMIPS 来判断 CPU 的性能
* BogoMIPS 设置越大,说明 CPU 性能越好。*/
pidmap_init(); /* PID 位图初始化 */
anon_vma_init(); /* 生成 anon_vma slab 缓存 */
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init(); /* 为对象的每个用于赋予资格(凭证) */
fork_init(); /* 初始化一些结构体以使用 fork 函数 */
proc_caches_init(); /* 给各种资源管理结构分配缓存 */
buffer_init(); /* 初始化缓冲缓存 */
key_init(); /* 初始化密钥 */
security_init(); /* 安全相关初始化 */
dbg_late_init();
vfs_caches_init(totalram_pages); /* 为 VFS 创建缓存 */
signals_init(); /* 初始化信号 */
/* rootfs populating might need page-writeback */
page_writeback_init(); /* 页回写初始化 */
proc_root_init(); /* 注册并挂载 proc 文件系统 */
nsfs_init();
cpuset_init(); /* 初始化 cpuset,cpuset 是将 CPU 和内存资源以逻辑性
* 和层次性集成的一种机制,是 cgroup 使用的子系统之一 */
cgroup_init(); /* 初始化 cgroup */
taskstats_init_early(); /* 进程状态初始化 */
delayacct_init();
check_bugs(); /* 检查写缓冲一致性 */
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init(); /* 重要!!里面会创建init内核进程 */
}
rest_init
/* file: init/main.c */
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting(); /* 启动RCU锁调度器 */
smpboot_thread_init(); /* 多核相关初始化 */
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}
上面通过调用kernel_thread
函数创建了2个内核进程(通过ps -A
可以看到它们):
- kernel_init进程的PID为1,它通过运行根文件系统中的init程序完成内核态到用户态的转变;
- kthreadd进程的PID为2,负责所有进程的调度与管理。
最后,该函数属于主进程,它的PID为0,调用cpu_startup_entry
来进入idle进程,里面经过arch_cpu_idle_prepare
函数准备之后再调用cpu_idle_loop
函数循环,其他进程有事干的时候就要抢占idle进程夺取CPU的使用权。
kernel_init
/* file: init/main.c */
static int __ref kernel_init(void *unused)
{
int ret;
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
flush_delayed_fput();
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
kernel_init_freeable
/* file: init/main.c */
static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done); /* 等待 kthreadd 进程准备就绪 */
/* Now the scheduler is fully set up and can do blocking allocations */
gfp_allowed_mask = __GFP_BITS_MASK;
/*
* init can allocate pages on any node
*/
set_mems_allowed(node_states[N_MEMORY]);
/*
* init can run on any cpu.
*/
set_cpus_allowed_ptr(current, cpu_all_mask);
cad_pid = task_pid(current);
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
lockup_detector_init();
smp_init(); /* 多核初始化 */
sched_init_smp(); /* 多核调度初始化 */
do_basic_setup(); /* 用于完成 Linux 下设备驱动初始化工作,里面会调用driver_init函数初始化驱动,包含如下:
* devtmpfs_init();
* devices_init();
* buses_init();
* ...
*/
/* 通过环境变量或其他方式设置了“console=ttymxc0,115200”,此时它的文件描述符fd为0 */
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0); /* 复制,将fd为1的标准输出重定向到fd为0那里去,即/dev/console */
(void) sys_dup(0); /* 再次复制,将fd为2的标准错误重定向到fd为0那里去 */
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (!ramdisk_execute_command)
ramdisk_execute_command = "/init";
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace(); /* 挂载根文件系统 */
}
/*
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*
* rootfs is available now, try loading the public keys
* and default modules
*/
integrity_load_keys();
load_default_modules();
}
driver_init
void __init driver_init(void)
{
/* These are the core pieces */
devtmpfs_init();
devices_init();
buses_init();
classes_init();
firmware_init();
hypervisor_init();
/* These are also core pieces, but must come after the
* core core pieces.
*/
platform_bus_init();
cpu_dev_init();
memory_dev_init();
container_dev_init();
of_core_init();
}
prepare_namespace
void __init prepare_namespace(void)
{
int is_floppy;
if (root_delay) {
printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
ssleep(root_delay);
}
/*
* wait for the known devices to complete their probing
*
* Note: this is a potential source of long boot delays.
* For example, it is not atypical to wait 5 seconds here
* for the touchpad of a laptop to initialize.
*/
wait_for_device_probe();
md_run_setup();
if (saved_root_name[0]) {
root_device_name = saved_root_name;
if (!strncmp(root_device_name, "mtd", 3) ||
!strncmp(root_device_name, "ubi", 3)) {
mount_block_root(root_device_name, root_mountflags);
goto out;
}
ROOT_DEV = name_to_dev_t(root_device_name);
if (strncmp(root_device_name, "/dev/", 5) == 0)
root_device_name += 5;
}
if (initrd_load())
goto out;
/* wait for any asynchronous scanning to complete */
if ((ROOT_DEV == 0) && root_wait) {
printk(KERN_INFO "Waiting for root device %s...\n",
saved_root_name);
while (driver_probe_done() != 0 ||
(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
msleep(100);
async_synchronize_full();
}
is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
if (is_floppy && rd_doload && rd_load_disk(0))
ROOT_DEV = Root_RAM0;
mount_root();
out:
devtmpfs_mount("dev");
sys_mount(".", "/", NULL, MS_MOVE, NULL);
sys_chroot(".");
}