内核启动分析
- BIOS/UEFI阶段:计算机开机后首先会执行BIOS或UEFI程序,进行一些硬件初始化操作,如检查硬件配置信息、启动自检程序、加载操作系统引导程序等。
- Bootloader阶段:BIOS/UEFI会加载引导程序,比如GRUB等,这个引导程序会在屏幕上显示一个菜单,供用户选择要启动的操作系统,如果只有一个操作系统,那么该引导程序将自动启动内核。引导程序会根据用户选择或默认设置找到内核映像文件,加载到内存中。
- 内核启动自解压阶段:内核启动时,它首先会解压缩自身,然后进行一系列的初始化工作,如初始化CPU、内存管理、设备管理、文件系统等。其中,内存管理是最重要的一步,因为内核需要将系统中的所有可用内存映射到自己的地址空间中。
- 内核引导阶段:
- init进程启动阶段:当内核初始化完毕后,会启动init进程。在Linux系统中,init进程是用户空间中的第一个进程,它负责初始化系统环境,包括加载配置文件、启动系统服务等。
- 过渡到rootfs
跟踪线索
arch/arm64/kernel/head.S 入口
init/main.c start_kernel
kernel中敲make后会生成System.map,相关的函数名和地址都放里面了
通过地址追踪到函数所在的文件,使用交叉编译工具链的某个工具
//arch/arm64/kernel/vmlinux.lds.S 连接脚本:指定程序在代码段数据段中如何分配
通过它知道arm64的第一条就是_text
//第一条指令位置
{//System.map 函数 列表
ffffff8008080000 T _text //第一条指令
ffffff8008081800 T vectors //异常向量表
ffffff80096007f4 T start_kernel //内核初始化
}
//通过地址追踪到函数所在的文件,使用交叉编译工具链的addr2line工具
//vmlinux是所有调试信息还在的内核镜像,也在kernel-4.9目录下
$ aarch64-linux-gnu-addr2line ffffff8008080000 -e vmlinux -f /*查找地址对应的文件位置
/home/yhai/kernel-4.9/arch/arm64/kernel/head.S:83 //这个文件的83行
$ aarch64-linux-gnu-addr2line ffffff8008081800 -e vmlinux -f
vectors
/home/yhai/kernel-4.9/arch/arm64/kernel/entry.S:393
$aarch64-linux-gnu-addr2line ffffff80096007f4 -e vmlinux -f
start_kernel
/home/yhai/kernel-4.9/init/main.c:482
*/
{//arch/arm64/kernel/vmlinux.lds.S 连接脚本:指定程序在代码段数据段中如何分配
21 OUTPUT_ARCH(aarch64)
22 ENTRY(_text) //入口地址
110 SECTIONS
111 {
130 .head.text : {
131 _text = .; //代码段连接的时,第一条指令的位置
132 HEAD_TEXT
133 }
134 .text : { /* Real text segment */
135 _stext = .; /* Text and read-only data */
136 __exception_text_start = .;
137 *(.exception.text)
138 __exception_text_end = .;
139 IRQENTRY_TEXT
140 SOFTIRQENTRY_TEXT
141 ENTRY_TEXT
}
}
}
入口(汇编部分)
//arch/arm64/kernel/head.S 入口
__HEAD
_head:
//对应前面 ffffff8008080000 T _text
83 b stext // branch to kernel start, magic
ENTRY(stext)
bl preserve_boot_args //保存u-boot传入的启动参数
bl el2_setup //异常级别(权限)的设置:建议EL2级别,如不则下降到EL1
//Drop to EL1, w0=cpu_boot_mode
bl set_cpu_boot_mode_flag // 按 ctrl+] 会跳到定义处, 按ctrl+o 会返回原位置
bl __create_page_tables //创建内存映射表
bl __cpu_setup //初始化CPU(配置访问权限,内存地址划分)
//查找源码: vscode -> arch/arm64 -> 右键 在文件夹中查找 __cpu_setup -> mm/proc.S
b __primary_switch //主切换
ENDPROC(stext)
1.保存传入的启动参数
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
preserve_boot_args:
mov x21, x0 // x21=FDT (X0里存放的是u-boot传入的设备树dtb的首地址)
adr_l x0, boot_args // record the contents of
stp x21, x1, [x0] // x0 .. x3 at kernel entry
stp x2, x3, [x0, #16]
dmb sy // needed before dc ivac with
// MMU off
add x1, x0, #0x20 // 4 x 8 bytes
b __inval_cache_range // tail call
ENDPROC(preserve_boot_args)
2.异常级别(特权)设置
//el2级别下的初始设置(el2_setup): 是el2,则进行虚拟化设置,处理完后用w0保存cpu的异常级别(el1或推荐的el2)
ENTRY(el2_setup)
msr SPsel, #1 // We want to use SP_EL{1,2}
mrs x0, CurrentEL //读取当前异常级别
cmp x0, #CurrentEL_EL2 //判断是EL2否
b.ne 1f //不是则跳到1f标号处
mrs x0, sctlr_el2
msr sctlr_el2, x0
b 2f
1: mrs x0, sctlr_el1
msr sctlr_el1, x0
mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
2:
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #8, #4
#else
mov x2, xzr
#endif
/* Hyp configuration. 虚拟化的配置*/
mov_q x0, HCR_HOST_NVHE_FLAGS
cbz x2, set_hcr
mov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:
msr hcr_el2, x0
isb
/* Generic timers. 通用定时器设置*/
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access 中断系统系统寄存器访问*/
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #24, #4
cbz x0, 3f
mrs_s x0, ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 3f // and check that it sticks
msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
3:
#endif
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
cbnz x2, 1f
/* sctlr_el1 */
mov x0, #0x0800 // Set/clear RES{1,0} bits
msr sctlr_el1, x0
/* Coprocessor traps. */
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
1:
/* EL2 debug */
mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
sbfx x0, x0, #8, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
4:
csel x0, xzr, x0, lt // all PMU counters from EL1
msr mdcr_el2, x0 // (if they exist)
/* Stage-2 translation */
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors
msr vbar_el2, x0 //设置异常向量表 基地址
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
ENDPROC(el2_setup)
3.设置CPU启动时的异常级别标志
//__boot_cpu_mode(全局变量,存储从前面的w0中保存的异常级别)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
1: str w0, [x1] // This CPU has booted in EL1
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
4.CPU初始化
//arch/arm64/mm/proc.S cpu初始化(__cpu_setup): 配置访问权限,内存地址划分
ENTRY(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD
mov x0, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x0 // access to the DCC from EL0
msr tcr_el1, x10
ret // return to head.S
ENDPROC(__cpu_setup)
5.主切换
//主切换(primary_switch): 使能MMU, 准备0号进程和内核栈,设置异常向量 跳转到start_kernel
__primary_switch:
#ifdef CONFIG_RANDOMIZE_BASE
mov x19, x0 // preserve new SCTLR_EL1 value
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endif
bl __enable_mmu /*使能MMU(内存管理单元) -> 实现虚拟地址到实际物理地址的映射
本质: 查表映射
用途:不连续变为连续,扩大地址范围
*/
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel //内核重定位(对应mmu开启后虚拟地址空间)
#endif
ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET
br x8
ENDPROC(__primary_switch)
/*
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __PHYS_OFFSET
*/
__primary_switched:
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
adr_l x5, init_task //初始化内核任务(准备的0号线程/进程和内核栈)
msr sp_el0, x5 // Save thread_info
//异常向量表设置
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
stp xzr, x30, [sp, #-16]!
mov x29, sp
str_l x21, __fdt_pointer, x5 // Save FDT pointer(指向设备树dtb)
ldr_l x4, kimage_vaddr // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
b start_kernel //跳转到内核C 语言部分 init/main.c start_kernel (启动内核初始化)
ENDPROC(__primary_switched)
内核初始化(C部分)
//start_kernel: 内核启动初始化
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
set_task_stack_end_magic(&init_task);
//日志: Booting Linux on physical CPU 0x0
smp_setup_processor_id(); // -> arch/arm64/kernel/setup.c 多核CPU ID的安装
debug_objects_early_init();
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
cgroup_init_early();
local_irq_disable();
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
//日志: Linux version 4.9.253-tegra
pr_notice("%s", linux_banner);
//日志:Boot CPU: AArch64 Processor [411fd071]
setup_arch(&command_line); //设置CPU体系架构 -> arch/arm64/kernel/setup.c
//vim跟踪: ctrl+] 跳转 ctrl+o 返回, :f 查看当前文件路径
mm_init_cpumask(&init_mm);
setup_command_line(command_line); //安装命令行参数(u-boot传入的参数)
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
boot_cpu_hotplug_init();
build_all_zonelists(NULL, NULL, false);
page_alloc_init();
//日志: Kernel command line: tegraid=21.1.2.0.0 ddr_die=4096M@2048M section=512M memtype=0 vpr_resize usb_port_owner_info=0 lane_owner_info=0 emc_max_dvfs=0 touch_id=0@63 video=tegrafb no_console_suspend=1 console=ttyS0,115200n8 debug_uartport=lsport,4 earlyprintk=uart8250-32bit,0x70006000 maxcpus=4 usbcore.old_scheme_first=1 lp0_vec=0x1000@0xff780000 core_edp_mv=1075 core_edp_ma=4000 gpt earlycon=uart8250,mmio32,0x70006000 root=/dev/mmcblk0p1 rw rootwait rootfstype=ext4 console=ttyS0,115200n8 console=tty0 fbcon=map:0 net.ifnames=0 quiet root=/dev/mmcblk0p1 rw rootwait rootfstype=ext4 console=ttyS0,115200n8 console=tty0 fbcon=map:0 net.ifnames=0 "
pr_notice("Kernel command line: %s\n", boot_command_line); //命令行参数(u-boot传入)
/* parameters may set static keys */
jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, NULL, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init(); //系统调用初始化
/*日志: Virtual kernel memory layout: 内存布局
[ 0.000000] modules : 0xffffff8000000000 - 0xffffff8008000000 ( 128 MB)
[ 0.000000] vmalloc : 0xffffff8008000000 - 0xffffffbebfff0000 ( 250 GB)
[ 0.000000] .text : 0xffffff8008080000 - 0xffffff8008f80000 ( 15360 KB)
[ 0.000000] .rodata : 0xffffff8008f80000 - 0xffffff8009610000 ( 6720 KB)
[ 0.000000] .init : 0xffffff8009610000 - 0xffffff8009e80000 ( 8640 KB)
[ 0.000000] .data : 0xffffff8009e80000 - 0xffffff800a163008 ( 2957 KB)
[ 0.000000] .bss : 0xffffff800a163008 - 0xffffff800a1fc0b4 ( 613 KB)
[ 0.000000] fixed : 0xffffffbefe7fd000 - 0xffffffbefec00000 ( 4108 KB)
[ 0.000000] PCI I/O : 0xffffffbefee00000 - 0xffffffbeffe00000 ( 16 MB)
[ 0.000000] vmemmap : 0xffffffbf00000000 - 0xffffffc000000000 ( 4 GB maximum)
[ 0.000000] 0xffffffbf00000000 - 0xffffffbf03fc8000 ( 63 MB actual)
[ 0.000000] memory : 0xffffffc000000000 - 0xffffffc0ff200000 ( 4082 MB)
*/
mm_init(); //内存管理初始化 -> mem_init() -> arch/arm64/mm/init.c
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init(); //内核调度初始化(多任务 并发调度)
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable(); //禁止抢占
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
/* trace_printk() and trace points may be used after this */
trace_init();
context_tracking_init();
radix_tree_init();
//日志: NR_IRQS:64 nr_irqs:64 0
/* init some links before init_ISA_irqs() */
early_irq_init(); //早期中断初始化: -> kernel/irq/irqdesc.c
//日志: /interrupt-controller@60004000: 192 interrupts"
init_IRQ(); //中断初始化: arch/arm64/kernel/irq.c -> irqchip_init() -> drivers/of/irq.c of_irq_init() 扫描设备树,匹配中断控制器节点
tick_init(); //时钟事件(滴答)初始化: 每滴答一下,产生一个时间周期事件,通知系统
rcu_init_nohz();
init_timers(); //初始化定时器
hrtimers_init();
softirq_init();
timekeeping_init();
time_init(); //时间初始化
sched_clock_postinit();
printk_safe_init();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init(); //控制台初始化
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_ext_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_stack_cache_init();
cred_init();
fork_init(); //进程初始化
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init();
signals_init(); //信号初始化
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
nsfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
delayacct_init();
check_bugs();
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init(); //创建初始进程
//各驱动模块的初始化: -> kernel_init-> kernel_init_freeable -> do_basic_setup -> do_initcalls()
prevent_tail_call_optimization();
}
1.体系架构安装
//setup_arch: 设置CPU体系架构 和设备树
//arch/arm64/kernel/setup.c
void __init setup_arch(char **cmdline_p)
{
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
sprintf(init_utsname()->machine, UTS_MACHINE);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = (unsigned long) _end;
*cmdline_p = boot_command_line;
early_fixmap_init();
early_ioremap_init();
setup_machine_fdt(__fdt_pointer);//设置 机器设备树
parse_early_param();
/*
* Unmask asynchronous aborts after bringing up possible earlycon.
* (Report possible System Errors once we can report this occurred)
*/
local_async_enable();
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_uninstall_idmap();
xen_early_init();
efi_init();
arm64_memblock_init();
paging_init();
acpi_table_upgrade();
/* Parse the ACPI tables for possible boot-time configuration */
acpi_boot_table_init();
if (acpi_disabled)
unflatten_device_tree(); //解析设备树,存在全局变量of_root中,通过它可遍历整个设备树
bootmem_init();
kasan_init();
request_standard_resources();
early_ioremap_reset();
if (acpi_disabled)
psci_dt_init();
else
psci_acpi_init();
cpu_read_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
}
//由u-boot通过寄存器x5 传递 设备树的地址dt_phys
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
void *dt_virt = fixmap_remap_fdt(dt_phys);
if (!dt_virt || !early_init_dt_scan(dt_virt)) { //检查设备树 幻数匹配否
pr_crit("\n"
"Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n"
"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
"\nPlease check your bootloader.",
&dt_phys, dt_virt);
while (true)
cpu_relax();
}
//获取机器 名字(单板)
dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
}
2.剩余的初始化
//rest_init: 余下的初始化
static noinline void __ref rest_init(void)
{
kernel_thread(kernel_init, NULL, CLONE_FS); //创建 1号进程(线程/任务): init进程(kernel_init)
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); //2号进程: 内核守护进程(kthreadd)
//0号进程(当前任务): 空闲进程(即有事就就,没事idle空转待命)
cpu_startup_entry(CPUHP_ONLINE); // cpu_idle_loop() kernel/sched/idle.c
// $ ps -aux 可查看 当前运行的所有进程
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 1 0.0 0.1 161068 5424 ? Ss 09:56 0:12 /sbin/init 2 //init进程
root 2 0.0 0.0 0 0 ? S 09:56 0:05 [kthreadd] //内核守护进程
// 0号进程 看不到,因它事内核进程,应用空间看不到
}
static int __ref kernel_init(void *unused)
{
int ret;
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_readonly();
system_state = SYSTEM_RUNNING;
numa_default_policy();
rcu_end_inkernel_boot();
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
/* 如果有启动参数做为执行命令,就执行
如无则 第一备用:/sbin/init,第二备用:/etc/init,第三备用:/bin/init,第四备用:/bin/sh。
*/
if (execute_command) {
ret = run_init_process(execute_command);/*execute_command是由u-boot里传入的参数bootargs中init来指定的,
如 setenv bootargs root=nfs nfsroot=192.168.9.120:/nfs/rootfs console=ttySAC2,115200 init=/linuxrc ip=192.168.9.9
这里init 是linuxrc (由busybox编译生成) ,
linuxrc主要是负责解析/etc下配置文件 如启动脚本/etc/init.d/rcS
*/
if (!ret)
return 0;
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
//从ps -aux知 运行init进程是/sbin/init
if (!try_to_run_init_process("/sbin/init") || // ->run_init_process -> do_execve() 从内核态切换到应用态(单向的)
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
static noinline void __init kernel_init_freeable(void)
{
do_basic_setup(); //基本设置(如 设备驱动初始化)
//在体系结构相关设置已完成后进行
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) //打开输入输出终端
pr_err("Warning: unable to open an initial console.\n");
load_default_modules();
}
/*
* Ok, the machine is now initialized. None of the devices
* have been touched yet, but the CPU subsystem is up and
* running, and memory and process management works.
*
* Now we can finally start doing some real work..
*/
static void __init do_basic_setup(void) //基本设置(如 设备驱动初始化)
{
cpuset_init_smp();
shmem_init();
driver_init(); //驱动 初始化 -> drivers/base/init.c
init_irq_proc();
do_ctors();
usermodehelper_enable();
do_initcalls(); //各驱动模块初始化
}
3.驱动初始化
//driver_init: 驱动初始化(如/dev/ /sys/devices 文件系统)
/**
* driver_init - initialize driver model.
*
* Call the driver model init functions to initialize their
* subsystems. Called early from init/main.c.
*/
void __init driver_init(void)
{
/* These are the core pieces */
devtmpfs_init(); //设备文件系统初始化( /dev 所挂载的文件系统)
devices_init(); /*设备初始化(创建 /sys/dev /sys/devices )
->drivers/base/core.c
int __init devices_init(void)
{
devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
if (!devices_kset)
return -ENOMEM;
dev_kobj = kobject_create_and_add("dev", NULL);
if (!dev_kobj)
goto dev_kobj_err;
sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj);
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
}
*/
buses_init(); /*总线初始化(创建 /sys/bus 和 /sys/devices/system)
drivers/base/bus.c :set nonu取消行号
int __init buses_init(void)
{
bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
if (!bus_kset)
return -ENOMEM;
system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
if (!system_kset)
return -ENOMEM;
return 0;
}
*/
classes_init(); //设备类的初始化(创建 /sys/class)
firmware_init(); //固件初始化(创建 /sys/firmware)
hypervisor_init();
/* These are also core pieces, but must come after the
* core core pieces.
*/
platform_bus_init(); /*平台设备总线初始化
drivers/base/platform.c
int __init platform_bus_init(void)
{
int error;
early_platform_cleanup();
error = device_register(&platform_bus);
if (error)
return error;
error = bus_register(&platform_bus_type);
if (error)
device_unregister(&platform_bus);
of_platform_register_reconfig_notifier();
return error;
}
struct bus_type platform_bus_type = {
.name = "platform",
.dev_groups = platform_dev_groups,
.match = platform_match,
.uevent = platform_uevent,
.pm = &platform_dev_pm_ops,
};
*/
cpu_dev_init();
memory_dev_init();
container_dev_init();
of_core_init(); //设备树初始化(创建 /sys/firmware/devicetree)
}
4.驱动模块初始化
动因 //维护灾难: 手动在内核init程序中,添加自己模块初始化,会带来维护灾难
// 而维护一个初始化函数数组,也是会眼花缭乱
办法 //在内核镜像文件中,自定义一个段,这个段里面专门用来存放这些初始化函数的地址,
//编译时自动添加到自定义段
//内核启动时,会从该段地址处取出函数指针,逐个执行。
//rest_init -> kernel_init-> kernel_init_freeable -> do_basic_setup -> do_initcalls()
static void __init do_initcalls(void)
{
int level;
//遍历:逐个调用各驱动模块初始化 -> 避免手动添加的灾难
for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
do_initcall_level(level);
}
5.声明驱动模块启动自动调用
//initcall使用实例
//从日志 切入
[ 0.207540] Tegra Revision: A02 SKU: 0x8f CPU Process: 0 SoC Process: 1/* 源码跟踪
vscode搜寻:
drivers目录 右键 -> 在文件夹下查找 Tegra Revision: ->
pr_info("Tegra Revision: %s SKU: 0x%x CPU Process: %d SoC Process: %d\n",
early_initcall(tegra_init_fuse);
在system.map搜:
/tegra_init_fuse ->
ffffff8009639ba8 t tegra_init_fuse ->
$ aarch64-linux-gnu-addr2line ffffff8009639ba8 -e vmlinux -f
tegra_init_fuse
/home/yhai/kernel-4.9/drivers/soc/tegra/fuse/fuse-tegra.c:330
ffffff8009823498 t __initcall_tegra_init_fuseearly -> 由来
源码跳转:
early_initcall(tegra_init_fuse) -> ctrl+] ->
#define early_initcall(fn) __define_initcall(fn, early) -> ctrl+] ->
#define __define_initcall(fn, id) \
static initcall_t __initcall_name(fn, id) __used \
__attribute__((__section__(".initcall" #id ".init"))) = fn; -> ctrl+] ->
#define __initcall_name(fn, id) __initcall_##fn##id ->
__initcall_tegra_init_fuseearly
*/
[ 0.207556] DTS File Name: /dvs/git/dirty/git-master_linux/kernel/kernel-4.9/arch/arm64/boot/dts/../../../../../../hardware/nvidia/platform/t210/porg/kernel-dts/tegra210-p3448-0000-p3449-0000-b00.dts
/*跟踪
在system.map搜:
__initcall_tegra_init_fuseearly的后面
__initcall_display_tegra_dts_infoearly -> /display_tegra_dts_info ->
ffffff800963ae34 t display_tegra_dts_info ->
$ aarch64-linux-gnu-addr2line ffffff800963ae34 -e vmlinux -f
display_tegra_dts_info
/home/yhai/kernel-4.9/drivers/soc/tegra/common.c:561
-> pr_info("DTS File Name: %s\n", dts_fname);
*/