跟踪分析Linux内核的启动过程
实验步骤:
- 使用实验楼的虚拟机打开 shell
- 输入下列命令
cd LinuxKernel/
qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img
- 用gbd跟踪调试Linux内核的启动
$ qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S
# 关于-s和-S选项的说明:
# 1. -S
# -S freeze CPU at startup (use ’c’ to start execution)
# 2. -s
# -s shorthand for -gdb tcp::1234
# 若不想使用1234端口,则可以使用-gdb tcp:xxxx来取代-s选项
- 另开一个 shell 窗口
# 打开 GDB 调试器
$ gdb
# 在 GDB 中输入以下命令:
# 在gdb界面中targe remote之前加载符号表
(gdb)file linux-3.18.6/vmlinux
# 建立gdb和gdbserver之间的连接,按c 让qemu上的Linux继续运行
(gdb)target remote:1234
# 断点的设置可以在target remote之前,也可以在之后
(gdb)break start_kernel
上图可以看到程序执行到start_kernel断点处,qemu窗口中的执行效果。
再在rest_init处为内核代码增加一个断点,执行效果如下:
代码分析
1. start_kernel( )函数
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
lockdep_init();
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
boot_init_stack_canary();
cgroup_init_early();
local_irq_disable();
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL);
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
set_init_arg);
jump_label_init();
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
context_tracking_init();
radix_tree_init();
/*
init some links before init_ISA_irqs()
*/
early_irq_init();
init_IRQ();
tick_init();
rcu_init_nohz();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_cgroup_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86 /*与x86硬件相关代码 如果主板支持EFI的话*/
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init();
fork_init(totalram_pages);
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages);
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
cgroup_init();
cpuset_init();
taskstats_init_early();
delayacct_init();
check_bugs();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
start_kernel( )函数的作用是内核中各种模块的初始化,相当于是C中的main函数。其中,start_kernel( )函数被调用之前内核代码是用汇编语言写的,完成系统的初始化工作,为c代码的运行设置环境,该函数中set_task_stack_end_magic(&init_task)中的init_task即初始化手工创建的PCB,0号进程即最终的idle进程,idle进程由系统自动创建, 运行在内核态。当系统没有需要执行的进程时就调度到idle进程。
2. init_task()函数
start_kernel() 函数几乎涉及到了内核的所有模块,如:trap_init()(中断向量的初始化)、mm_init()(内存管理的初始化)sched_init()(调度模块的初始化)等,首先是init_task():
struct task_struct init_task = INIT_TASK(init_task);
可以看出 init_task(0号进程)是 task_struct 类型,是进程描述符,使用宏INIT_TASK对其进行初始化。接下来就是对各种模块的初始化:
3. rest_init()函数
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current); /*idle初始化*/
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}
通过rest_init()函数新建kernel_init和kthreadd进程,分别为系统的1号进程和2号进程。第11行代码:
kernel_thread(kernel_init, NULL, CLONE_FS);
是调用 kernel_thread()创建1号内核线程(在 kernel_init 函数正式启动),kernel_thread()是 fork 出了一个新进程来执行kernel_init 函数,而 init_task 是使用宏进行初始化的。也就是说0进程不是系统通过 kernel_thread 的方式(也就是 fork)创建的(init_task 是唯一一个没有通过 fork()产生的进程)。
总结
start_kernel()函数为内核启动的起点,执行了各种初始化操作,对于本次实验的进行来说,整体比较顺利,偶尔实验楼卡顿,但是对于内核启动过程的理解还是比较困难的,start_kernel()的各个模块做了非常繁杂的初始化操作,内核得以正常启动。本次实验结合《庖丁解牛》和对应的实验楼实验以及视频教程配合完成,对于linux内核源码文件的目录结构、内核线程、用户进程、进程的调度以及内核的数据结构有了一定的了解, 但是在学习的过程中由于对指令和术语的许多不清楚,在查资料的途中会遇到许多新的问题,但总的来说基本都理解了。在今后的实验过程中,我也会通过一系列实验,并更加深入学习《庖丁解牛》课本知识,加强对linux内核的工作的认识和理解。