android6.0 init进程启动前世
对应代码android6.0_r72,kernel对应linux3.18
前言
android系统的底层是Linux系统,所以android系统启动的开始就是Linux的启动,当然Linux系统的前期引导BootLoader这里就不做具体说明了。
linux内核启动 head.S
linux内核入口函数为 stext(linux/arch/arm/kernel/head.S)
ENTRY(stext)
mov x21, x0 // x21=FDT
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
bl set_cpu_boot_mode_flag
mrs x22, midr_el1 // x22=cpuid
mov x0, x22
bl lookup_processor_type
mov x23, x0 // x23=current cpu_table
cbz x23, __error_p // invalid processor (x23=0)?
bl __vet_fdt
// 创建页表
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm64/mm/proc.S for details. x23 = base of
* cpu_info structure selected by lookup_processor_type above.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
// 保存MMU使能后跳转地址
ldr x27, __switch_data // address to jump to after
// 使能MMU后跳转到__mmap_switched // MMU has been enabled
adr lr, __enable_mmu // return (PIC) address
ldr x12, [x23, #CPU_INFO_SETUP]
add x12, x12, x28 // __virt_to_phys
br x12 // initialise processor
ENDPROC(stext)
stext代码段:
将 __switch_data 标签地址保存到MMU: ldr x27, __switch_data
然后使能MMU跳转到保存的 __switch_data 地址: adr lr, __enable_mmu
__switch_data:
.quad __mmap_switched
.quad __bss_start // x6
.quad __bss_stop // x7
.quad processor_id // x4
.quad __fdt_pointer // x5
.quad memstart_addr // x6
.quad init_thread_union + THREAD_START_SP // sp
继续走到 __mmap_switched 代码段
__mmap_switched:
adr x3, __switch_data + 8
ldp x6, x7, [x3], #16
1: cmp x6, x7
b.hs 2f
str xzr, [x6], #8 // Clear BSS
b 1b
2:
ldp x4, x5, [x3], #16
ldr x6, [x3], #8
ldr x16, [x3]
mov sp, x16
// 保存设备信息、设备树及启动参数存储地址
str x22, [x4] // Save processor ID
str x21, [x5] // Save FDT pointer
str x24, [x6] // Save PHYS_OFFSET
mov x29, #0
// 跳转到 start_kernel 函数
b start_kernel
ENDPROC(__mmap_switched)
从 start_kernel 函数开始,内核进入C语言部分,进行内核的大部分初始化工作。
start_kernel
函数所在位置:/linux/init/Main.c
start_kernel 初始化内如详细说明可参考 https://www.cnblogs.com/lifexy/p/7366782.html
start_kernel 涉及大量初始化工作,其主要涉及一下5部分内容。
1.内核启动参数的获取和处理
2.setup_arch(&command_line);函数
3.内存管理的初始化(从bootmem到slab)
4.各种内核体系的初始化
5.rest_init();函数
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
cgroup_init_early();
local_irq_disable();
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL);
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
set_init_arg);
jump_label_init();
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
/* trace_printk() and trace points may be used after this */
trace_init();
context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
tick_init();
rcu_init_nohz();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_cgroup_init();
page_ext_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init();
fork_init(totalram_pages);
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages);
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
cgroup_init();
cpuset_init();
taskstats_init_early();
delayacct_init();
check_bugs();
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
// 在内核init线程的最后执行了init_post函数,在这个函数中真正启动了用户空间进程init。
rest_init();
}
此函数这里不做详细说明,下一步启动进入 rest_init() 函数进而启动用户空间进程init。
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting();// 1.内核RCU锁机制调度启动,因为下面就要用到
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
我们必须先创建init内核线程,这样它就可以获得pid为1。
尽管如此init线程将会挂起来等待创建kthreads线程。
如果我们在创建kthreadd线程前调度它,就将会出现OOPS。
*/
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();// 1.设定NUMA系统的内存访问策略为默认
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
/*
1.创建kthreadd内核线程,它的作用是管理和调度其它内核线程。
2.它循环运行一个叫做kthreadd的函数,该函数的作用是运行kthread_create_list全局链表中维护的内核线程。
3.调用kthread_create创建一个kthread,它会被加入到kthread_create_list 链表中;
4.被执行过的kthread会从kthread_create_list链表中删除;
5.且kthreadd会不断调用scheduler函数让出CPU。此线程不可关闭。
上面两个线程就是我们平时在Linux系统中用ps命令看到:
$ ps -A
PID TTY TIME CMD
3.1 ? 00:00:00 init
4.2 ? 00:00:00 kthreadd
*/
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*1.获取kthreadd的线程信息,获取完成说明kthreadd已经创建成功。并通过一个
complete变量(kthreadd_done)来通知kernel_init线程。*/
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_idle();
}
1、rest_init() 首先通过 kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
创建 kernel_init 进程,但是 kernel_init 函数会首先通过 async_synchronize_full(); 阻塞等待所有异步调用执行完成,在释放内存前,必须完成所有的异步 __init 代码。
2、创建维护的内核进程 kthreadd ,此函数运行一个无限 for 循环来管理 kthread_create_list 列表。
kernel_init
kernel_init 是通过 kernel_thread 函数调用 do_fork 函数最终创建的,如下:
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
}
kernel_init 函数将完成设备驱动程序的初始化,并调用init_post函数启动用户进程
部分书籍介绍的内核启动流程基于经典的2.6版本,kernel_init 函数还会调用 init_post 函数专门负责 _init进程的启动,现版本已经被整合到了一起。
static int __ref kernel_init(void *unused)
{
int ret;
// 进行init进程的一些初始化操作
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
// 等待所有异步调用执行完成,在释放内存前,必须完成所有的异步 __init 代码
async_synchronize_full();
// 释放所有init.* 段中的内存
free_initmem();
// arm64空实现
mark_readonly();
// 设置系统状态为运行状态
system_state = SYSTEM_RUNNING;
// 设定NUMA系统的默认内存访问策略
numa_default_policy();
// 释放所有延时的struct file结构体
flush_delayed_fput();
// ramdisk_execute_command 的值为"/init",在 kernel_init_freeable() 初始化时赋值的
if (ramdisk_execute_command) {
// 通过 do_execve 函数去运行根目录下的 init 程序
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
// execute_command 的值如果有定义就去根目录下找对应的应用程序,然后启动
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
execute_command, ret);
}
// 如果 ramdisk_execute_command 和 execute_command 定义的应用程序都没有找到, 就到根目录下找 /sbin/init,/etc/init,/bin/init,/bin/sh 这四个应用程序进行启动
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
ramdisk_execute_command 的值为"/init",在 kernel_init_freeable() 初始化时赋值的
static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done);
/* Now the scheduler is fully set up and can do blocking allocations */
gfp_allowed_mask = __GFP_BITS_MASK;
/*
* init can allocate pages on any node
*/
set_mems_allowed(node_states[N_MEMORY]);
/*
* init can run on any cpu.
*/
set_cpus_allowed_ptr(current, cpu_all_mask);
cad_pid = task_pid(current);
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
lockup_detector_init();
smp_init();
sched_init_smp();
do_basic_setup();
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0);
(void) sys_dup(0);
#ifdef CONFIG_MTK_HIBERNATION
/* IPO-H, move here for console ok after hibernaton resume */
software_resume();
#endif
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (!ramdisk_execute_command)
ramdisk_execute_command = "/init";
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
/*
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
/* rootfs is available now, try loading default modules */
load_default_modules();
}
因此 kernel_init 函数在 if (ramdisk_execute_command) 判断 ramdisk_execute_command 时ramdisk_execute_command 已经被初始化为了 “/init” ;kernel_init 函数将执行 :
ret = run_init_process(ramdisk_execute_command); 之后返回。
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
// 去执行 init_filename 程序,也就是 /init 程序, 之后进入 /init 的 main() 函数。
return do_execve(getname_kernel(init_filename),
(const char __user *const __user *)argv_init,
(const char __user *const __user *)envp_init);
}
接下来的启动将执行 /init 程序, 也就进入了 /init 的 main() 函数;这便是 android 系统启动的第一个进程 init 进程了。
以下为补充:
kthreadd
kthreadd ,此函数运行一个无限 for 循环来管理 kthread_create_list 列表:
int kthreadd(void *unused)
{
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
schedule();
__set_current_state(TASK_RUNNING);
spin_lock(&kthread_create_lock);
while (!list_empty(&kthread_create_list)) {
struct kthread_create_info *create;
create = list_entry(kthread_create_list.next,
struct kthread_create_info, list);
list_del_init(&create->list);
spin_unlock(&kthread_create_lock);
create_kthread(create);
spin_lock(&kthread_create_lock);
}
spin_unlock(&kthread_create_lock);
}
return 0;
}