本文介绍的内容是基于Linux3.18源码,做学习简要描述。
Linux内核的启动的流程如下(init/main.c):
start_kernel->rest_init->kernel_init->kernel_init_freeable->do_basic_setup->driver_init
1、boot 引导系统启动(主要汇编部分)
2、start_kernel
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
cgroup_init_early();
local_irq_disable();
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init();
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary();
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL);
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, NULL, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
jump_label_init();
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0);
pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
mm_init();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
/* trace_printk() and trace points may be used after this */
trace_init();
context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
tick_init();
rcu_init_nohz();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
kmem_cache_init_late();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init();
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_cgroup_init();
debug_objects_mem_init();
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init();
fork_init(totalram_pages);
proc_caches_init();
buffer_init();
key_init();
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages);
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
cgroup_init();
cpuset_init();
taskstats_init_early();
delayacct_init();
check_bugs();
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
3、rest_init 启动kernel_init
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting();
smpboot_thread_init();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}
4、kernel_init
static int __ref kernel_init(void *unused)
{
int ret;
#ifdef CUST_FF
char *temp_cmdLine = saved_command_line;
#endif
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_readonly();
system_state = SYSTEM_RUNNING;
numa_default_policy();
flush_delayed_fput();
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
execute_command, ret);
}
#ifdef CUST_FF
pr_notice("Kernel command line: %s\n", temp_cmdLine);
if(strstr(temp_cmdLine, "root=ubi0:rootfs"))
{
extern void *subsystem_get_with_fwname(const char *name, const char *fw_name);
if( sys_mount("ubi1:modem", "/firmware", "ubifs", MS_RDONLY, "bulk_read")
&& sys_mount("/dev/ubi1_0", "/firmware", "ubifs", MS_RDONLY, "bulk_read")
)
{
printk("mount failed \n");
}
else
{
printk("mount success \n");
subsystem_get_with_fwname("modem", "modem");
}
}
else
{
pr_notice("[stanely]:not found system ubi. pls check your partition config \n");
}
#endif
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
5、kernel_init_freeable
static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done);
/* Now the scheduler is fully set up and can do blocking allocations */
gfp_allowed_mask = __GFP_BITS_MASK;
/*
* init can allocate pages on any node
*/
set_mems_allowed(node_states[N_MEMORY]);
/*
* init can run on any cpu.
*/
set_cpus_allowed_ptr(current, cpu_all_mask);
cad_pid = task_pid(current);
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
lockup_detector_init();
smp_init();
sched_init_smp();
do_basic_setup();
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0);
(void) sys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (!ramdisk_execute_command)
ramdisk_execute_command = "/init";
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
/*
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
/* rootfs is available now, try loading default modules */
load_default_modules();
}
6、do_basic_setup 初始化
static void __init do_basic_setup(void)
{
cpuset_init_smp();
usermodehelper_init();
shmem_init();
driver_init();
init_irq_proc();
do_ctors();
usermodehelper_enable();
do_initcalls();
random_int_secret_init();
}
7、driver_init函数完成设备驱动子系统的初始化
void __init driver_init(void)
{
/* These are the core pieces */
devtmpfs_init();
devices_init();
buses_init();
classes_init();
firmware_init();
hypervisor_init();
platform_bus_init();
cpu_dev_init();
memory_dev_init();
container_dev_init();
}
8、devices_init函数:创建devices相关的设备模型
int __init devices_init(void)
{
devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
if (!devices_kset)
return -ENOMEM;
dev_kobj = kobject_create_and_add("dev", NULL);
if (!dev_kobj)
goto dev_kobj_err;
sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj);
if (!sysfs_dev_block_kobj)
goto block_kobj_err;
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
return 0;
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
kobject_put(dev_kobj);
dev_kobj_err:
kset_unregister(devices_kset);
return -ENOMEM;
}
devices_init函数建立/sys/devices、/sys/dev这两个顶级容器节点和/sys/dev/block、/sys/dev/char这两个二级节点
devices_kset是在/driver/base/core.c中定义的全局变量,对应于sysfs的/sys/devices节点
dev_kobj是在/driver/base/core.c中定义的static变量, 对应于sysfs的/sys/dev节点
sysfs_dev_block_kobj是在/driver/base/core.c中定义的全局变量,是dev_kobj的子节点,对应于sysfs的/sys/dev/block,该节点是所有block设备的父节点
sysfs_dev_char_kobj是在/driver/base/core.c中定义的全局变量,是dev_kobj的子节点,对应于sysfs的/sys/dev/char,该节点是所有char设备的父节点
9、buses_init函数:建立Linux设备模型总线部分的顶级节点
1 int __init buses_init(void) 2 { 3 bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL); 4 if (!bus_kset) 5 return -ENOMEM; 6 return 0; 7 }
buses_init函数建立了/sys/bus这个顶级容器节点,该节点是Linux内核中所有总线类型的父节点,
bus_kset是drivers/base/bus.c中定义的static变量。
10、classes_init函数:建立Linux设备模型类部分的顶级容器节点
1 int __init classes_init(void) 2 { 3 class_kset = kset_create_and_add("class", NULL, NULL); 4 if (!class_kset) 5 return -ENOMEM; 6 return 0; 7 }
建立了/sys/class这个顶级容器节点,该节点是Linux内核中所有class类型的父节点,class_kset是drivers/base/class.c中定义的static变量
11、firmware_init函数:建立Linux设备模型中firmware部分的顶级节点
1 int __init firmware_init(void) 2 { 3 firmware_kobj = kobject_create_and_add("firmware", NULL); 4 if (!firmware_kobj) 5 return -ENOMEM; 6 return 0; 7 }
建立了/sys/firmware这个顶级kobj节点,firmware_kobj是在drivers/base/firmware.c定义的全局变量
12、hypervisor_init函数:建立hypervisor_kobj的顶级容器节点
1 int __init hypervisor_init(void) 2 { 3 hypervisor_kobj = kobject_create_and_add("hypervisor", NULL); 4 if (!hypervisor_kobj) 5 return -ENOMEM; 6 return 0; 7 }
建立了/sys/hypervisor这个顶级节点,hypervisor_kobj是在 drivers/base/hypervisor.c中定义的全局变量
前面几个函数执行完成后基本已经建立起Linux设备模型的框架了,接下来几个函数都是在前面框架中的扩展
13、platform_bus_init函数:
初始化Linux平台总线,平台总线(platform_bus_type)是在2.6 kernel中引入的一种虚拟总线,主要用来管理CPU的片上资源,具有较好的可移植性能,因此在2.6及以后的kernel中,很多驱动都已经用platform改写了
1 int __init platform_bus_init(void) 2 { 3 int error; 4 5 early_platform_cleanup(); 6 7 error = device_register(&platform_bus); 8 if (error) 9 return error; 10 error = bus_register(&platform_bus_type); 11 if (error) 12 device_unregister(&platform_bus); 13 return error; 14 }
platform_bus_init函数中引入两个变量platform_bus,platform_bus_type,均为在drivers/base/platform.c中定义的全局变量,如下:
1 struct device platform_bus = { 2 .init_name = "platform", 3 }; 4 5 struct bus_type platform_bus_type = { 6 .name = "platform", 7 .dev_attrs = platform_dev_attrs, 8 .match = platform_match, 9 .uevent = platform_uevent, 10 .pm = &platform_dev_pm_ops, 11 };
该函数先调用device_register函数注册platform_bus这个设备,这会在/sys/devices/节点下创建platform节点/sys/devices/platform,此设备节点是所有platform设备的父节点,即所有platform_device设备都会在/sys/devices/platform下创建子节点
然后调用bus_register函数注册platform_bus_type这个总线类型,这会在/sys/bus目录下创建一个platform节点,这个节点是所有platform设备和驱动的总线类型,即所有platform设备和驱动都会挂载到这个总线上;
14、system_bus_init函数:在/sys/devices/下建立system容器节点
1 int __init system_bus_init(void) 2 { 3 system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj); 4 if (!system_kset) 5 return -ENOMEM; 6 return 0; 7 }
通过kset_create_and_add函数建立了/sys/devices/system这个容器节点,system_kset是drivers/base/sys.c中定义的static变量。
这个节点中主要是一些和cpu、中断控制器、时钟之类的设备
15、cpu_dev_init函数:建立一个名为”cpu”的类
1 int __init cpu_dev_init(void) 2 { 3 int err; 4 5 err = sysdev_class_register(&cpu_sysdev_class); 6 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7 if (!err) 8 err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class); 9 #endif 10 11 return err; 12 }
调用sysdev_class_register函数注册cpu_sysdev_class这个类,cpu_sysdev_class是在drivers/base/cpu.c中定义的全局变量,如下:
1 struct sysdev_class cpu_sysdev_class = { 2 .name = "cpu", 3 .attrs = cpu_sysdev_class_attrs, 4 };
sysdev_class_register函数中将父节点设置为前面system_bus_init时创建的变量system_kset,即/sys/devices/system/节点;因此cpu_dev_init函数会在/sys/devices/system/节点下建立一个名为cpu的子节点/sys/devices/system/cpu/,该节点包含CPU相关的属性
16、memory_dev_init函数:建立memory设备在sysfs中的接口
1 /* 2 * Initialize the sysfs support for memory devices... 3 */ 4 int __init memory_dev_init(void) 5 { 6 unsigned int i; 7 int ret; 8 int err; 9 unsigned long block_sz; 10 11 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 12 ret = sysdev_class_register(&memory_sysdev_class); 13 if (ret) 14 goto out; 15 16 block_sz = get_memory_block_size(); 17 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 18 19 /* 20 * Create entries for memory sections that were found 21 * during boot and have been initialized 22 */ 23 for (i = 0; i < NR_MEM_SECTIONS; i++) { 24 if (!present_section_nr(i)) 25 continue; 26 err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE, 27 BOOT); 28 if (!ret) 29 ret = err; 30 } 31 32 err = memory_probe_init(); 33 if (!ret) 34 ret = err; 35 err = memory_fail_init(); 36 if (!ret) 37 ret = err; 38 err = block_size_init(); 39 if (!ret) 40 ret = err; 41 out: 42 if (ret) 43 printk(KERN_ERR "%s() failed: %d\n", __func__, ret); 44 return ret; 45 }
这边与cpu_dev_init函数类似,先调用sysdev_class_register函数注册memory_sysdev_class这个类,cpu_sysdev_class是在drivers/base/memory.c中定义的全局变量,如下:
1 #define MEMORY_CLASS_NAME "memory" 2 3 static struct sysdev_class memory_sysdev_class = { 4 .name = MEMORY_CLASS_NAME, 5 };
因此与cpu_dev_init函数类似,将memory_sysdev_class的父节点设置为前面system_bus_init时创建的变量system_kset,因此会在/sys/devices/system/节点下建立一个名为”memory”的子节点/sys/devices/system/memory/;该节点包含了内存相关的属性,如块大小等
17、do_initcalls系统中有关于选择的驱动部分的初始化工作
static void __init do_initcall_level(int level)
{
initcall_t *fn;
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
initcall_command_line, __start___param,
__stop___param - __start___param,
level, level,
NULL, &repair_env_string);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
do_one_initcall(*fn);
}
static void __init do_initcalls(void)
{
int level;
for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
do_initcall_level(level);
}