此篇内容总共分为 5 部分, 第 5 部分 手工绘制的内核启动程序调用关系,可以参考函数名和文件路径,
清晰分析内核启动路线。
(1). 文件 arch/arm/kernel/vmlinux.lds.S 文件是kernel链接文件
ENTRY(stext)
SECTIONS
{
.head.text : {
_text = .;
HEAD_TEXT
}
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
IDMAP_TEXT
__exception_text_start = .;
*(.exception.text)
__exception_text_end = .;
IRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
*(.gnu.warning)
*(.glue_7)
*(.glue_7t)
. = ALIGN(4);
*(.got) /* Global offset table */
ARM_CPU_KEEP(PROC_INFO)
}
}
(2). 文件 arch/arm/kernel/head.S 文件是kernel 启动文件
__HEAD ;// #define __HEAD .section ".head.text","ax"
ENTRY(stext)
ARM_BE8(setend be ) @ ensure we are in BE8 mode
THUMB( badr r9, 1f ) @ Kernel is always entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
#ifdef CONFIG_ARM_VIRT_EXT
bl __hyp_stub_install
#endif
@ ensure svc mode and all interrupts masked
safe_svcmode_maskall r9
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type @ r5=procinfo r9=cpuid
movs r10, r5 @ invalid processor (r5=0)?
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p @ yes, error 'p'
#ifdef CONFIG_ARM_LPAE
mrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0
and r3, r3, #0xf @ extract VMSA support
cmp r3, #5 @ long-descriptor translation table format?
THUMB( it lo ) @ force fixup-able long branch encoding
blo __error_lpae @ only classic page table format
#endif
/* ... */
bl __vet_atags
#ifdef CONFIG_SMP_ON_UP
bl __fixup_smp
#endif
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
bl __fixup_pv_table
#endif
bl __create_page_tables
ldr r13, =__mmap_switched @ address to jump to after, 此程序跳转到 head_common.S 文件
@ mmu has been enabled
badr lr, 1f @ return (PIC) address
#ifdef CONFIG_ARM_LPAE
mov r5, #0 @ high TTBR0
mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn
#else
mov r8, r4 @ set TTBR1 to swapper_pg_dir
#endif
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10
ret r12
1: b __enable_mmu
(3). 文件 arch/arm/kernel/head_common.S
__INIT ;//#define __INIT .section ".init.text","ax"
__mmap_switched:
adr r3, __mmap_switched_data
ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b
ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
cmp r7, #0
strne r0, [r7] @ Save control register values
b start_kernel ;// 跳转至 init/main.c 文件中 start_kernel(void)
ENDPROC(__mmap_switched)
(4).文件 init/main.c
内核初始化程序
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
cgroup_init_early();
local_irq_disable();
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line); /** 处理uboot 传递过来的参数 */
mm_init_cpumask(&init_mm);
setup_command_line(command_line); /** command line */
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL);
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
/* parameters may set static keys */
jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, NULL, &unknown_bootoption); /** 处理 uboot 传递的 arg 参数 */
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
/* trace_printk() and trace points may be used after this */
trace_init();
context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
tick_init();
rcu_init_nohz();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_ext_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init();
fork_init();
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
nsfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
delayacct_init();
check_bugs();
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
(5). 内核启动调度路线
可参考函数名和路径查看调用关系.
start_kernel(void) // init/main.c
|
|-setup_arch(&command_line); // 处理uboot 传递 bootcmd 参数
|
|-parse_args(.....); // 处理uboot 传递 bootargs 参数
| unknown_bootoption
| obsolete_checksetup() // 处理命令参数 'init=' ...
|
|-rest_init(); // init/main.c
|
|-kernel_init(); //函数指针 kernel_thread(kernel_init, NULL, CLONE_FS);
| |
| |-kernel_init_freeable(); // init/main.c
| | | /* Wait until kthreadd is all set-up. */
| | |-wait_for_completion(&kthreadd_done);
| | | /* init can allocate pages on any node */
| | |-set_mems_allowed(node_states[N_MEMORY]);
| | | /* init can run on any cpu. */
| | |-set_cpus_allowed_ptr(current, cpu_all_mask);
| | |-cad_pid = task_pid(current);
| | |-smp_prepare_cpus(setup_max_cpus);
| | |- .... /* 省略部分代码 */
| | |-smp_init();
| | |-sched_init_smp();
| | |-page_alloc_init_late();
| | |
| | |-do_basic_setup(); // system 基础配置项
| | | | cpuset_init_smp(); // smp init
| | | | shmem_init(); // single
| | | |-driver_init(); // driver/base/init.c 驱动初始化
| | | | |
| | | | |-devtmpfs_init(); /* 注册设备类型文件系统 */
| | | | | err = register_filesystem(&dev_fs_type);
| | | | | thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
| | | | | /* 运行 devtmpfsd() 函数 ,创建文件系统 */
| | | | |-devices_init();
| | | | | kset_create_and_add("devices", &device_uevent_ops, NULL);
| | | | | kobject_create_and_add("dev", NULL);
| | | | | kobject_create_and_add("block", dev_kobj);
| | | | | kobject_create_and_add("char", dev_kobj);
| | | | |
| | | | |-buses_init();
| | | | | kset_create_and_add("bus", &bus_uevent_ops, NULL);
| | | | | kset_create_and_add("system", NULL, &devices_kset->kobj);
| | | | |
| | | | |-classes_init();
| | | | | kset_create_and_add("class", NULL, NULL);
| | | | |
| | | | |-firmware_init();
| | | | | firmware_kobj = kobject_create_and_add("firmware", NULL);
| | | | |
| | | | |-hypervisor_init();
| | | | | kobject_create_and_add("hypervisor", NULL);
| | | | |
| | | | |-platform_bus_init();
| | | | | early_platform_cleanup();
| | | | | device_register(&platform_bus); // platform_dev 设备初始化,
| | | | | bus_register(&platform_bus_type); // platform_bus 初始化
| | | | | of_platform_register_reconfig_notifier();
| | | | |
| | | | |-cpu_dev_init();
| | | | | subsys_system_register(&cpu_subsys, cpu_root_attr_groups)
| | | | | cpu_dev_register_generic();
| | | | | cpu_register_vulnerabilities();
| | | | |
| | | | |-memory_dev_init();
| | | | | subsys_system_register(&memory_subsys,
| | | | | memory_root_attr_groups);
| | | | |-container_dev_init();
| | | | | subsys_system_register(&container_subsys, NULL);
| | | | |
| | | | |-of_core_init();
| | | | kset_create_and_add("devicetree", NULL, firmware_kobj);
| | | | proc_symlink("device-tree", NULL,
| | | | "/sys/firmware/devicetree/base");
| | | |-init_irq_proc();
| | | | proc_mkdir("irq", NULL);
| | | | register_irq_proc(irq, desc);
| | | |
| | | |-do_ctors();
| | | | usermodehelper_enable();
| | | |
| | | |-do_initcalls();
| | | | do_initcall_level(level);
| | | | for (fn = initcall_levels[level];
| | | | fn < initcall_levels[level+1]; fn++)
| | | | do_one_initcall(*fn);
| | | |
| | | |-random_int_secret_init();
| | | /* Open the /dev/console on the rootfs, this should never fail */
| | |-sys_open((const char __user *) "/dev/console", O_RDWR, 0)
| | | /* check if there is an early userspace init.*/
| | |-ramdisk_execute_command = "/init";
| | |-sys_access((const char __user *) ramdisk_execute_command, 0)
| | | /* If yes, let it do all the work */
| | |-prepare_namespace();
| | | mount_root(); /* 挂载文件系统,启动文件管理线程 */
| | | /* rootfs is available now, try loading the public keys*/
| | |-integrity_load_keys();
| | |-load_default_modules(); */ and default modules
| | /* need to finish all async __init code before freeing the memory */
| |-async_synchronize_full();
| | /* 来自于 env arg 参数 'init=',参数为空,则执行参数,'sbin/init ..' */
| |-ramdisk_execute_command = ''
| |-run_init_process(ramdisk_execute_command);
| |
| |-try_to_run_init_process("/sbin/init") /* 执行此文件夹下的脚本程序 */
| |-try_to_run_init_process("/etc/init") /* 执行此文件夹下的脚本程序 */
| |-try_to_run_init_process("/bin/init") /* 执行此文件夹下的脚本程序 */
| |-try_to_run_init_process("/bin/sh") /* 执行此文件夹下的脚本程序 */
| /* Setup a clean context for our children to inherit. create kthreadd */
|-pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
|
|-kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
|
|-complete(&kthreadd_done);
| /*The boot idle thread must execute schedule(),
| * at least once to get things moving */
|-init_idle_bootup_task(current);
|
|-schedule_preempt_disabled();
| /* Call into cpu_idle with preempt disabled */
|-cpu_startup_entry(CPUHP_ONLINE);
|
|-arch_cpu_idle_prepare();
|
|-cpu_idle_loop(); /* Linux kernel tick thread, idle thread */