android—init进程如何重启service

《android—init.rc的读取》中介绍过,init进程会启动很多native的service,这些service如果不是oneshot的,当service出现异常挂掉后,init需要将其重新启动起来,那么具体是如何操作的?其实主要是借助了信号和socket来实现。

在init的main()函数中,首先进行了signal相关的初始化,设置了init对SIGCHLD(native的service都是在init中通过fork新建的子进程,子进程挂掉后会给init发送SIGCHLD信号)的信号处理函数,

queue_builtin_action(signal_init_action, "signal_init");

static int signal_init_action(int nargs, char **args)
{
    signal_init();
    return 0;
}

void signal_init(void)
{
    int s[2];

    struct sigaction act;
    memset(&act, 0, sizeof(act));
    act.sa_handler = sigchld_handler;
    act.sa_flags = SA_NOCLDSTOP;
    sigaction(SIGCHLD, &act, 0);

    //创建一个socketpair,一个读fd一个写fd
    /* create a signalling mechanism for the sigchld handler */
    if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == 0) {
        signal_fd = s[0];
        signal_recv_fd = s[1];
        fcntl(s[0], F_SETFD, FD_CLOEXEC);
        fcntl(s[0], F_SETFL, O_NONBLOCK);
        fcntl(s[1], F_SETFD, FD_CLOEXEC);
        fcntl(s[1], F_SETFL, O_NONBLOCK);
    }

    handle_signal();
}

SIGCHLD的信号处理函数sigchld_handler(),就是向signal_fd中写数据,这时候signal_recv_fd将会受到数据,那么,init肯定在哪里对这个signal_recv_fd进行了poll。

//接收到SIGCHLD的处理函数
//往signal_fd里随便写个东西,socket pair的另外一端马上就能受到
static void sigchld_handler(int s)
{
    write(signal_fd, &s, 1);
}

在init的for循环中,发现确实对signal_recv_fd进行了poll,

        if (!signal_fd_init && get_signal_fd() > 0) {
            //信号处理函数在子进程挂掉后会给signal_fd写东西
            //这时候socketpair的对端,signal_recv_fd会受到,这里监听了该signal_recv_fd
            ufds[fd_count].fd = get_signal_fd();
            ufds[fd_count].events = POLLIN;
            ufds[fd_count].revents = 0;
            fd_count++;
            signal_fd_init = 1;
        }

int get_signal_fd()
{
    return signal_recv_fd;
}

当signal_recv_fd描述符poll触发返回时,执行handle_signal()函数,

        for (i = 0; i < fd_count; i++) {
            if (ufds[i].revents == POLLIN) {
                if (ufds[i].fd == get_property_set_fd())
                    handle_property_set_fd();
                else if (ufds[i].fd == get_keychord_fd())
                    handle_keychord();
                else if (ufds[i].fd == get_signal_fd())
                    //执行handle_signal()函数
                    handle_signal();
            }
        }

而,

void handle_signal(void)
{
    char tmp[32];

   //从signal_recv_fd中读数据,
    /* we got a SIGCHLD - reap and restart as needed */
    read(signal_recv_fd, tmp, sizeof(tmp));
    while (!wait_for_one_process(0))
        ;
}

wait_for_one_process的主要功能是等待子进程退出,然后设置对应需要重启service的状态为SVC_RESTARTING,同时需要注意会执行service属性中的onrestart相关的,其实就是去重启相关的其他service,

/*
    这个函数基本都是返回0
*/
static int wait_for_one_process(int block)
{
    pid_t pid;
    int status;
    struct service *svc;
    struct socketinfo *si;
    time_t now;
    struct listnode *node;
    struct command *cmd;

    //等待子进程执行完退出
    //通过pid找到service,重启service不在这里,这里只负责启动service下面的onrestart

    while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );
    if (pid <= 0) return -1;
    INFO("waitpid returned pid %d, status = %08x\n", pid, status);

    svc = service_find_by_pid(pid);
    if (!svc) {
        ERROR("untracked pid %d exited\n", pid);
        return 0;
    }

    NOTICE("process '%s', pid %d exited\n", svc->name, pid);

    //
    if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) {
        kill(-pid, SIGKILL);
        NOTICE("process '%s' killing any children in process group\n", svc->name);
    }

    /* remove any sockets we may have created */
    for (si = svc->sockets; si; si = si->next) {
        char tmp[128];
        snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name);
        unlink(tmp);
    }

    svc->pid = 0;
    svc->flags &= (~SVC_RUNNING);

        /* oneshot processes go into the disabled state on exit,
         * except when manually restarted. */
    if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) {
        svc->flags |= SVC_DISABLED;
    }

        /* disabled and reset processes do not get restarted automatically */
    if (svc->flags & (SVC_DISABLED | SVC_RESET) )  {
        notify_service_state(svc->name, "stopped");
        return 0;
    }

    now = gettime();
    //如果service是critical的,崩了4次,还有个4分钟后,android就重启进入recovery了
    if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) {
        if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {
            if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {
                ERROR("critical process '%s' exited %d times in %d minutes; "
                      "rebooting into recovery mode\n", svc->name,
                      CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60);
                android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
                return 0;
            }
        } else {
            svc->time_crashed = now;
            svc->nr_crashed = 1;
        }
    }

    svc->flags &= (~SVC_RESTART);
    svc->flags |= SVC_RESTARTING;

    /* Execute all onrestart commands for this service. */
    //重启这个service下的onrestart这个action下的所有commands
    list_for_each(node, &svc->onrestart.commands) {
        cmd = node_to_item(node, struct command, clist);
        cmd->func(cmd->nargs, cmd->args);
    }
    notify_service_state(svc->name, "restarting");
    return 0;
}

上面只是设置了service的状态为SVC_RESTARTING,真正重启的地方在main函数中的for循环的restart_processes(),

restart_processes();

static void restart_processes()
{
    process_needs_restart = 0;
    //只会去重启service状态是SVC_RESTARTING的service,
    service_for_each_flags(SVC_RESTARTING,
                           restart_service_if_needed);
}

void service_for_each_flags(unsigned matchflags,
                            void (*func)(struct service *svc))
{
    struct listnode *node;
    struct service *svc;
    list_for_each(node, &service_list) {
        svc = node_to_item(node, struct service, slist);
        if (svc->flags & matchflags) {
            func(svc);
        }
    }
}
static void restart_service_if_needed(struct service *svc)
{
    time_t next_start_time = svc->time_started + 5;

    if (next_start_time <= gettime()) {
        svc->flags &= (~SVC_RESTARTING);
        //重启service
        service_start(svc, NULL);
        return;
    }

    if ((next_start_time < process_needs_restart) ||
        (process_needs_restart == 0)) {
        process_needs_restart = next_start_time;
    }
}

至此,init就完成了对service的重启,保证了某些关键service一直运行。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值