linux的启动框架如下:
linux的内核映像文件zImage本身是一个压缩的文件,在arch/arm/boot/Makefile中,我们可以找到如下语句:
$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
@echo ' Kernel: $@ is ready'
可见, zImage 是由 arch\arm\boot\compressed\vmlinux 二进制文件转化而来。在 arch/arm/boot/compressed/Makefile 中,有如下语句:
$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
$(call if_changed,ld)
$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
$(call if_changed,$(suffix_y))
$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE
由此可以分析得知,zImage
是由vmlinux.lds
,head.o
,misc.o
以及压缩的内核piggy.gzip.o
组成。也就是说,zImage
文件即包含了未压缩部分,如head.o,misc.o
,也包含了压缩的部分,如piggy.gzip.o
。那么,zImage
的程序入口在哪里呢?
在arch\arm\boot\compressed\vmlinux.lds
中,可以看到其框架如下:
OUTPUT_ARCH(arm)
ENTRY(_start)
SECTIONS
{
……
. = 0;
_text = .;
.text : {
_start = .;
*(.start)
*(.text)
*(.text.*)
*(.fixup)
*(.gnu.warning)
*(.rodata)
*(.rodata.*)
*(.glue_7)
*(.glue_7t)
*(.piggydata)
. = ALIGN(4);
}
……
}
可以看出,其入口点为
_start
,程序
arch\arm\boot\compressed\head.S
会首先被执行,在
head.S
中,程序执行了一堆准备工作后,开始调用
misc.c
中的
decompress_kernel
函数,开始解压内核。其函数如下:
void
decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
unsigned long free_mem_ptr_end_p,
int arch_id)
{
int ret;
output_data = (unsigned char *)output_start;
free_mem_ptr = free_mem_ptr_p;
free_mem_end_ptr = free_mem_ptr_end_p;
__machine_arch_type = arch_id;
arch_decomp_setup();
putstr("Uncompressing Linux...");
ret = do_decompress(input_data, input_data_end - input_data,
output_data, error);
if (ret)
error("decompressor returned an error");
else
putstr(" done, booting the kernel.\n");
}
这里有几句经典的打印信息:
Uncompressing Linux... done, booting the kernel.
执行完这个后,程序将会跳到
init/main.c
中,执行经典的
start_kernel
函数。
start_kernel()
会调用一系列初始化函数来设置中断,执行进一步的内存配置等,其函数原型如下:
asmlinkage void __init start_kernel(void)
{
char * command_line;
extern const struct kernel_param __start___param[], __stop___param[];
smp_setup_processor_id();//返回启动的CPU的ID号,如为单核则什么也不做
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
debug_objects_early_init();
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
cgroup_init_early();
local_irq_disable();//关闭当前CPU的中断
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
tick_init();
boot_cpu_init();
page_address_init();//初始化页地址,使用链表将其链接起来
printk(KERN_NOTICE "%s", linux_banner);//打印内核版本信息
setup_arch(&command_line);//设置体系结构,由内核根目录的Makefile决定
mm_init_owner(&init_mm, &init_task);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL);
page_alloc_init();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);//打印命令行信息
parse_early_param();//解析内核选项
parse_args("Booting kernel", static_command_line, __start___param,
__stop___param - __start___param,
&unknown_bootoption);
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();//进程调度器初始化
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();//禁止内核抢占
if (!irqs_disabled()) {//检查中断是否已经打开,如果已经打开,则关闭中断
printk(KERN_WARNING "start_kernel(): bug: interrupts were "
"enabled *very* early, fixing it\n");
local_irq_disable();
}
idr_init_cache();
perf_event_init();
rcu_init();//初始化RCU(Read-Copy Update)机制
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();//中断向量初始化
prio_tree_init();
init_timers();//初始化定时器相关的数据结构
hrtimers_init();//对高精度时钟进行初始化
softirq_init();//初始化tasklet_softirq和hi_softirq
timekeeping_init();
time_init();//初始化系统时钟源
profile_init();//对内核的profile(一个内核性能调式工具)功能进行初始化
call_function_init();
if (!irqs_disabled())
printk(KERN_CRIT "start_kernel(): bug: interrupts were "
"enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
/* Interrupts are enabled now so all GFP allocations are safe. */
gfp_allowed_mask = __GFP_BITS_MASK;
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();// 初始化控制台以显示printk的内容,在此之前调用的printk只是把数据存到缓冲区里
if (panic_later)
panic(panic_later, panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
"disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_cgroup_init();
enable_debug_pagealloc();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
#ifdef CONFIG_X86
if (efi_enabled)
efi_enter_virtual_mode();
#endif
thread_info_cache_init();
cred_init();
fork_init(totalram_pages);
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages);//虚拟文件系统的初始化
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
cgroup_init();
cpuset_init();
taskstats_init_early();
delayacct_init();
check_bugs();
acpi_early_init(); /* before LAPIC and SMP init */
sfi_init_late();
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
start_kernel
函数大致执行任务如下:
输出Linux版本信息,设置与体系结构相关的环境,页表结构初始化,初始化系统IRQ,核心进程调度器初始化,时间、定时器初始化,提取并分析核心启动参数,控制台初始化,剖析器数据结构初始化,核心Cache初始化,延迟校准,内存初始化,创建文件,目录cache,创建与虚拟内存相关的cache,块设备读写缓冲区初始化,创建页cache,创建信号队列cache,初始化内存inode表,创建内存文件描述符表,检查体系结构,SMP机器除引导CPU之外的CPU初始化,创建第一个核心线程,调用init函数,调用cpu_idle()等待调度。
作为核心线程的init()函数完成外设及其驱动程序的加载和初始化,挂接根文件系统。init()打开/dev/console设备,重定向stdin、stdout和stderr到控制台。之后,它搜索文件系统中的init程序(也可以由“init=”命令行参数指定init程序),并使用execve()系统调用执行init程序。搜索init程序的顺序为/sbin/init、/etc/init、/bin/init 和/bin/sh。在嵌入式系统中,多数情况下,可以给内核传入一个简单的shell脚本来启动必需的嵌入式应用程序。
至此,漫长的 Linux 内核引导和启动过程就结束了,而init()对应的由start_kernel()
创建的第一个线程也进入用户模式。