android init进程
Init进程在Android系统中非常重要,他是Linux系统中用户空间的第一个进程。它的任务主要做2件事情:
1.解析配置文件init.rc,然后启动系统各种native进程,例如非常重要的进程Nygote进程,SurfaceFingler进程和media进程,这是它最主要的工作之一,也是最重要的工作,因为native进程靠它来启动。
2.维护一个属性服务property service,并且管理它。应用程序客户端可以通过set老设置属性,也可以通过get属性获取属性,常用的setProperty方法和getProperty方法就是通过框架层链接到这个属性服务。当然也可以通过adb shell来获取属性和设置属性,例:adb shell 通过getprop命令获得手机的所有属性,当需要设置属性的时候,可以使用setprop命令来操作
在Init进程里除了上述的两个方面,它还做了一些其他的工作,下面就来看看具体工作。
看完流程图,我们从代码角度分析Init
int main(int argc, char** argv)
{
if (!strcmp(basename(argv[0]), "ueventd")) {
return ueventd_main(argc, argv);
}
if (!strcmp(basename(argv[0]), "watchdogd")) {
return watchdogd_main(argc, argv);
}
// 清mask,这样做是为了后面创建文件,解决权限问题
umask(0);
//添加环境变量
add_environment("PATH", _PATH_DEFPATH);
bool is_first_stage = (argc == 1) || (strcmp(argv[1], "--second-stage") != 0);
// 代码在if中建立各种用户空间目录,如/dev,/proc,/sys等,然后挂载分区
if (is_first_stage) {
mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");
mkdir("/dev/pts", 0755);
mkdir("/dev/socket", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
mount("proc", "/proc", "proc", 0, NULL);
mount("sysfs", "/sys", "sysfs", 0, NULL);
}
// We must have some place other than / to create the device nodes for
// kmsg and null, otherwise we won't be able to remount / read-only
// later on. Now that tmpfs is mounted on /dev, we can actually talk
// to the outside world.
open_devnull_stdio();
//将log写到/dev/kmasg中,其信息就是Init进程启动的相关log,可以通过adb shell,使用命令cat去查看,如果以后大家遇见系统无法启动,可以首先查看是不是init进程出现了问题
klog_init();
klog_set_level(KLOG_NOTICE_LEVEL);
NOTICE("init%s started!\n", is_first_stage ? "" : " second stage");
if (!is_first_stage) {
// Indicate that booting is in progress to background fw loaders, etc.
//尝试在dev的目录下创建".booting"文件,创建完后关闭,目的是查看dev是不是有权限写
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
//初始化实行服务,这是Init进程的两大核心之一,另外一个核心就是解析init.rc文件
property_init();
// If arguments are passed both on the command line and in DT,
// properties set in DT always have priority over the command-line ones.
process_kernel_dt();
//解析内核启动参数
process_kernel_cmdline();
// Propogate the kernel variables to internal variables
// used by init as well as the current required properties.
export_kernel_boot_props();
}
// Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
selinux_initialize(is_first_stage);
// If we're in the kernel domain, re-exec init to transition to the init domain now
// that the SELinux policy has been loaded.
if (is_first_stage) {
if (restorecon("/init") == -1) {
ERROR("restorecon failed: %s\n", strerror(errno));
security_failure();
}
char* path = argv[0];
char* args[] = { path, const_cast<char*>("--second-stage"), nullptr };
if (execv(path, args) == -1) {
ERROR("execv(\"%s\") failed: %s\n", path, strerror(errno));
security_failure();
}
}
// These directories were necessarily created before initial policy load
// and therefore need their security context restored to the proper value.
// This must happen before /dev is populated by ueventd.
INFO("Running restorecon...\n");
restorecon("/dev");
restorecon("/dev/socket");
restorecon("/dev/__properties__");
restorecon_recursive("/sys");
epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (epoll_fd == -1) {
ERROR("epoll_create1 failed: %s\n", strerror(errno));
exit(1);
}
//信号处理函数,它是设置子进程退出的信号处理函数,当子进程意外退出的时候,Init会去捕获这个进程信息,当捕获到这些异常信息后,就会调用设置的相应的捕获函数来处理,例:当Zygote死之后,就会调用handle_signal()函数重启Zygote进程
signal_handler_init();
//导入默认环境变量
property_load_boot_defaults();
//启动属性服务
start_property_service();
//解析init.rc文件
init_parse_config_file("/init.rc");
action_for_each_trigger("early-init", action_add_queue_tail);
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
// ... so that we can start queuing up actions that require stuff from /dev.
queue_builtin_action(mix_hwrng_into_linux_rng_action, "mix_hwrng_into_linux_rng");
queue_builtin_action(keychord_init_action, "keychord_init");
queue_builtin_action(console_init_action, "console_init");
// Trigger all the boot actions to get us started.
action_for_each_trigger("init", action_add_queue_tail);
// Repeat mix_hwrng_into_linux_rng in case /dev/hw_random or /dev/random
// wasn't ready immediately after wait_for_coldboot_done
queue_builtin_action(mix_hwrng_into_linux_rng_action, "mix_hwrng_into_linux_rng");
// Don't mount filesystems or start core system services in charger mode.
char bootmode[PROP_VALUE_MAX];
if (property_get("ro.bootmode", bootmode) > 0 && strcmp(bootmode, "charger") == 0) {
action_for_each_trigger("charger", action_add_queue_tail);
} else if (strncmp(bootmode, "ffbm", 4) == 0) {
KLOG_ERROR("Booting into ffbm mode\n");
action_for_each_trigger("ffbm", action_add_queue_tail);
} else {
action_for_each_trigger("late-init", action_add_queue_tail);
}
// Run all property triggers based on current state of the properties.
queue_builtin_action(queue_property_triggers_action, "queue_property_triggers");
//进入死循环
while (true) {
if (!waiting_for_exec) {
//执行子进程对应的命令,即执行init.rc文件里配置的命令
execute_one_command();
//用于重启死掉的service
restart_processes();
}
int timeout = -1;
if (process_needs_restart) {
timeout = (process_needs_restart - gettime()) * 1000;
if (timeout < 0)
timeout = 0;
}
if (!action_queue_empty() || cur_action) {
timeout = 0;
}
bootchart_sample(&timeout);
epoll_event ev;
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, timeout));
if (nr == -1) {
ERROR("epoll_wait failed: %s\n", strerror(errno));
} else if (nr == 1) {
((void (*)()) ev.data.ptr)();
}
}
return 0;
}
上述是Init.cpp中main()函数的全部代码,main函数很复杂,主要做了一下几件事
1.klog_init() 函数将log写到/dev/kmsg中
2.process_kernel_cmdline()解析内核启动参数
3.single_handler_init() 设置信号处理函数
4.property_load_boot_defaults 导入默认的环境变量
5.property_init() 初始化属性服务和 start_property_service()启动属性服务
6.init_parse_config_file(“/init.rc”)解析配置文件,使用while循环调用execute_one_command()函数来启动子进程
下面来看一下这几个函数的具体实现:
klog_init() 函数将log写到/dev/kmsg中,对应的代码如下:
void klog_init(void) {
if (klog_fd >= 0) return; /* Already initialized */
klog_fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
if (klog_fd >= 0) {
return;
}
static const char* name = "/dev/__kmsg__";
if (mknod(name, S_IFCHR | 0600, (1 << 8) | 11) == 0) {
klog_fd = open(name, O_WRONLY | O_CLOEXEC);
unlink(name);
}
}
建立/dev/kmsg节点,创建完之后通过open打开,得到一个全局文件描述符klog_fd并且保存起来,klog_write()函数,该函数会把log信息写在新创建的klog_fd 文件描述符代表的文件中
void klog_writev(int level, const struct iovec* iov, int iov_count) {
if (level > klog_level) return;
if (klog_fd < 0) klog_init();
if (klog_fd < 0) return;
TEMP_FAILURE_RETRY(writev(klog_fd, iov, iov_count));
}
klog_writev通过writev函数把信息写在klog_fd文件描述符所对应的文件里,也就是/dev/kmsg中,接下来看看init进程是怎么一步一步调用,最后进入klog_write()函数把log写到/dev/kmsg中的
#ifndef _INIT_LOG_H_
#define _INIT_LOG_H_
#include <cutils/klog.h>
#define ERROR(x...) init_klog_write(KLOG_ERROR_LEVEL, x)
#define NOTICE(x...) init_klog_write(KLOG_NOTICE_LEVEL, x)
#define INFO(x...) init_klog_write(KLOG_INFO_LEVEL, x)
void init_klog_write(int level, const char* fmt, ...) __printflike(2, 3);
int selinux_klog_callback(int level, const char* fmt, ...) __printflike(2, 3);
#endif
main函数中有大量的ERROR,NOTICE,INFO方法的调用,这三个函数都对应了init_klog_write函数
static void init_klog_vwrite(int level, const char* fmt, va_list ap) {
static const char* tag = basename(getprogname());
char prefix[64];
snprintf(prefix, sizeof(prefix), "<%d>%s: ", level, tag);
char msg[512];
vsnprintf(msg, sizeof(msg), fmt, ap);
iovec iov[2];
iov[0].iov_base = prefix;
iov[0].iov_len = strlen(prefix);
iov[1].iov_base = msg;
iov[1].iov_len = strlen(msg);
klog_writev(level, iov, 2);
}
init_klog_write函数调用了klog_writev函数来处理log,这个klog_writev就是上面分析的函数
process_kernel_cmdline() 解析内核启动参数
static void process_kernel_cmdline(void)
{
import_kernel_cmdline(false, import_kernel_nv);
if (qemu[0])
import_kernel_cmdline(true, import_kernel_nv);
}
import_kernel_cmdline函数来解析内核启动参数,这些内核参数保存在\proc\cmdline里,可以通过adb命令来查看
signal_handler_init() 设置信号处理函数
void signal_handler_init() {
// Create a signalling mechanism for SIGCHLD.
int s[2];
//创建socket
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0, s) == -1) {
ERROR("socketpair failed: %s\n", strerror(errno));
exit(1);
}
//发送数据的文件描述符
signal_write_fd = s[0];
//接收数据的文件描述符
signal_read_fd = s[1];
// Write to signal_write_fd if we catch SIGCHLD.
struct sigaction act;
memset(&act, 0, sizeof(act));
//发送数据的函数
act.sa_handler = SIGCHLD_handler;
act.sa_flags = SA_NOCLDSTOP;
sigaction(SIGCHLD, &act, 0);
reap_any_outstanding_children();
//接收数据的函数
register_epoll_handler(signal_read_fd, handle_signal);
}
static void SIGCHLD_handler(int) {
if (TEMP_FAILURE_RETRY(write(signal_write_fd, "1", 1)) == -1) {
ERROR("write(signal_write_fd) failed: %s\n", strerror(errno));
}
}
当子进程异常退出的时候,SIGCHLD_handler()函数会被调用,然后往signal_write_fd中写入数据,让接收数据的函数handle_signal()接收,下面来看这个函数
static void handle_signal() {
// Clear outstanding requests.
char buf[32];
read(signal_read_fd, buf, sizeof(buf));
reap_any_outstanding_children();
}
handle_signal() 函数接收到数据后,会调用reap_any_outstanding_children()继续处理,接下来看看这个函数
static void reap_any_outstanding_children() {
while (wait_for_one_process()) {
}
}
这个函数简单的调用wait_for_one_process这个函数来进一步处理,
static bool wait_for_one_process() {
int status;
pid_t pid = TEMP_FAILURE_RETRY(waitpid(-1, &status, WNOHANG));
if (pid == 0) {
return false;
} else if (pid == -1) {
ERROR("waitpid failed: %s\n", strerror(errno));
return false;
}
//找到死掉的service,如果Zygote死了,就找到死掉的Zygote
service* svc = service_find_by_pid(pid);
std::string name;
if (svc) {
name = android::base::StringPrintf("Service '%s' (pid %d)", svc->name, pid);
} else {
name = android::base::StringPrintf("Untracked pid %d", pid);
}
NOTICE("%s %s\n", name.c_str(), DescribeStatus(status).c_str());
if (!svc) {
return true;
}
// TODO: all the code from here down should be a member function on service.
if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) {
NOTICE("Service '%s' (pid %d) killing any children in process group\n", svc->name, pid);
kill(-pid, SIGKILL);
//杀死死掉的service的所有的子进程,如果这里死掉的service是zygote,那么就杀死zygote的子进程,这也是为什么zygote死后,java世界崩溃的原因,因为所有的应用程序及system_server进程都是zygote的子进程
}
// Remove any sockets we may have created.
for (socketinfo* si = svc->sockets; si; si = si->next) {
char tmp[128];
snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name);
unlink(tmp);
}
if (svc->flags & SVC_EXEC) {
INFO("SVC_EXEC pid %d finished...\n", svc->pid);
waiting_for_exec = false;
list_remove(&svc->slist);
free(svc->name);
free(svc);
return true;
}
svc->pid = 0;
svc->flags &= (~SVC_RUNNING);
// Oneshot processes go into the disabled state on exit,
// except when manually restarted.
if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) {
svc->flags |= SVC_DISABLED;
}
// Disabled and reset processes do not get restarted automatically.
if (svc->flags & (SVC_DISABLED | SVC_RESET)) {
svc->NotifyStateChange("stopped");
return true;
}
time_t now = gettime();
if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) {
if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {
if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {
ERROR("critical process '%s' exited %d times in %d minutes; "
"rebooting into recovery mode\n", svc->name,
CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60);
android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
return true;
}
} else {
svc->time_crashed = now;
svc->nr_crashed = 1;
}
}
//把死掉的service的flags设置为SVC_RESTART,这个flag是后面启动死掉的service的根据
svc->flags &= (~SVC_RESTART);
svc->flags |= SVC_RESTARTING;
// Execute all onrestart commands for this service.
struct listnode* node;
//执行死掉的service中的onrestart命令,
list_for_each(node, &svc->onrestart.commands) {
command* cmd = node_to_item(node, struct command, clist);
cmd->func(cmd->nargs, cmd->args);
}
svc->NotifyStateChange("restarting");
return true;
}
list_for_each(node, &svc->onrestart.commands) {
command* cmd = node_to_item(node, struct command, clist);
cmd->func(cmd->nargs, cmd->args);
}
... prompt'''
这段代码没有解释,/它的意思是执行死掉的service中的onrestart命令,例如
service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-system-server
class main
socket zygote stream 660 root system
onrestart write /sys/android_power/request_state wake
onrestart write /sys/power/state on
onrestart restart media
onrestart restart netd
writepid /dev/cpuset/foreground/tasks
那么本身死掉的这个service是在哪里被启动起来的,比如zygote进程死掉后,zygote本身在哪里被启动起来的
int main(int argc,char ** argv){
.....
while (true) {
if (!waiting_for_exec) {
execute_one_command();
//重启死掉的service,如果zygote死掉了,就重启zygote
restart_processes();
}
}
property_load_boot_defaults()导入默认的环境变量
void property_load_boot_defaults() {
load_properties_from_file(PROP_PATH_RAMDISK_DEFAULT, NULL);
}
此函数中简单的调用了load_properties_from_file()函数,这个函数中导入了PROP_PATH_RAMDISK_DEFAULT文件的内容,下面查看对应的文件,具体定义如下:
void property_load_boot_defaults() {
load_properties_from_file(PROP_PATH_RAMDISK_DEFAULT, NULL);
}
android\bionic\libc\include\sys_system_properties.h
#define PROP_PATH_RAMDISK_DEFAULT "/default.prop"
#define PROP_PATH_SYSTEM_BUILD "/system/build.prop"
#define PROP_PATH_VENDOR_BUILD "/vendor/build.prop"
#define PROP_PATH_LOCAL_OVERRIDE "/data/local.prop"
#define PROP_PATH_FACTORY "/factory/factory.prop"
从这个定义可知,PROP_PATH_RAMDISK_DEFAULT对应着default.prop文件,可以使用adb命令cat查看,可以看到默认环境标量的值,最终这些值会被导出来设置到property中,为了验证这一点,可以看下property的中的值是多少。
main中比较重要的函数就介绍完了