参考:https://www.cnblogs.com/coder2012/p/3188355.html
版权声明:本文遵循 CC 4.0 BY-SA 版权协议
1. 主框架
ngx_start_worker_processes() 位于Nginx_process_cycle.c中,主要的工作是创建子进程。
在Nginx中,master进程和worker进程之间的通信,是通过socketpair函数创建一对socket来实现。
而这对socket被保存在进程结构体ngx_process中的channel[2]数组中。
其中channel[0]为父进程的socket,channel[1]为子进程的socket。
static void
ngx_start_worker_processes(ngx_cycle_t *cycle, ngx_int_t n, ngx_int_t type)
{
ngx_int_t i;
ngx_channel_t ch;
// NGX_LOG_NOTICE级别的error日志
ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "start worker processes");
ngx_memzero(&ch, sizeof(ngx_channel_t));
// 这个是在创建一个worker进程后,用来通知给其他已有worker进程的命令
ch.command = NGX_CMD_OPEN_CHANNEL;
// 创建n个worker进程,n来自配置文件中的数目
for (i = 0; i < n; i++) {
// 创建worker进程函数,下面详述
ngx_spawn_process(cycle, ngx_worker_process_cycle,
(void *) (intptr_t) i, "worker process", type);
// 记录上面创建的新worker进程的信息(进程号,全局进程数组中的下标以及其父进程socket fd),用于广播
ch.pid = ngx_processes[ngx_process_slot].pid;
// 意为新创建worker进程在全部进程数组ngx_processes[]中的下标
ch.slot = ngx_process_slot;
// ngx_process_slot为全局变量,在ngx_spawn_process()中被赋值
// 这里每个子进程和父进程之间使用的是socketpair系统调用建立起来的全双工的socket
ch.fd = ngx_processes[ngx_process_slot].channel[0];
//channel[0]为父进程的socket,channel[1]为子进程的socket
ngx_pass_open_channel(cycle, &ch);
// 给每一个已有进程的父进程发送刚创建worker进程的信息,下面详述
}
}
在分析ngx_spawn_process()创建新进程之前,先了解下进程标识符(通俗点说就是进程挂了需不需要重启)
在源码中,nginx_process.h中,有以下几种属性标识:
- NGX_PROCESS_NORESPAWN :子进程退出时,父进程不会再次重启
- NGX_PROCESS_JUST_SPAWN :–
- NGX_PROCESS_RESPAWN :子进程异常退出时,父进程需要重启
- NGX_PROCESS_JUST_RESPAWN :–
- NGX_PROCESS_DETACHED :热代码替换,暂时估计是用于在不重启Nginx的情况下进行软件升级
关于JUST_SPAWN / JUST_RESPAWN的含义?
- 这个是关于Nginx配置重载的内容。当Master进程检测到需要进程重载时,会进行ngx_init_cycle(),然后启动新的worker进程。执行:
ngx_start_worker_processes(cycle, ccf->worker_processes, NGX_PROCESS_JUST_RESPAWN);
NGX_PROCESS_JUST_RESPAWN标识最终会在ngx_spawn_process()创建worker进程时,将ngx_processes[s].just_spawn = 1
,以此作为区别旧的worker进程的标记。
2. 创建worker进程函数
ngx_spawn_process() (有点长,注意看中文注释)
/* 入参介绍:
* cycle: ngx_cycle_t结构体指针,不解释了,《笔记十二》
* proc:创建子进程的回调函数,worker进程从此入口开始执行,后续的worker工作在此接口中
* data: worker进程proc回调函数的入参
* name: worker进程名字
* respawn: 见上面进程属性介绍
*/
ngx_pid_t
ngx_spawn_process(ngx_cycle_t *cycle, ngx_spawn_proc_pt proc, void *data,
char *name, ngx_int_t respawn)
{
u_long on;
ngx_pid_t pid;
ngx_int_t s; // 后面代码你可以看出,s即为创建进程在全部进程数组中的下标
if (respawn >= 0) {
s = respawn; // 如果respawn不小于0,则视为当前进程已经退出,需要重启
} else {
/* 寻找一个可创建进程的在全部进程数组中的下标位置 */
for (s = 0; s < ngx_last_process; s++) {
if (ngx_processes[s].pid == -1) {
break;
}
}
/* 如果下标超出Nginx全部进程数组中规定的最大数量,告警并返回错误 */
if (s == NGX_MAX_PROCESSES) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
"no more than %d processes can be spawned",
NGX_MAX_PROCESSES);
return NGX_INVALID_PID;
}
}
if (respawn != NGX_PROCESS_DETACHED) {
/* 不是热代码替换 */
/* Solaris 9 still has no AF_LOCAL */
/* 这里相当于Master进程调用socketpair()为新的worker进程创建一对全双工的socket */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, ngx_processes[s].channel) == -1)
{
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"socketpair() failed while spawning \"%s\"", name);
return NGX_INVALID_PID;
}
ngx_log_debug2(NGX_LOG_DEBUG_CORE, cycle->log, 0,
"channel %d:%d",
ngx_processes[s].channel[0],
ngx_processes[s].channel[1]);
/* 设置master的channel[0](即写端口),channel[1](即读端口)均为非阻塞方式 */
if (ngx_nonblocking(ngx_processes[s].channel[0]) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
ngx_nonblocking_n " failed while spawning \"%s\"",
name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
if (ngx_nonblocking(ngx_processes[s].channel[1]) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
ngx_nonblocking_n " failed while spawning \"%s\"",
name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
/* 设置异步模式:
* 这里可以看下《网络编程卷一》的ioctl函数和fcntl函数 or 网上查询
*/
on = 1; // 标记位,ioctl用于清除(0)或设置(非0)操作
/* 设置channel[0]的信号驱动异步I/O标志
* FIOASYNC:该状态标志决定是否收取针对socket的异步I/O信号(SIGIO)
* 其与O_ASYNC文件状态标志等效,可通过fcntl的F_SETFL命令设置or清除
*/
if (ioctl(ngx_processes[s].channel[0], FIOASYNC, &on) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"ioctl(FIOASYNC) failed while spawning \"%s\"", name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
/* F_SETOWN:用于指定接收SIGIO和SIGURG信号的socket属主(进程ID或进程组ID)
* 这里意思是指定Master进程接收SIGIO和SIGURG信号
* SIGIO信号必须是在socket设置为信号驱动异步I/O才能产生,即上一步操作
* SIGURG信号是在新的带外数据到达socket时产生的
*/
if (fcntl(ngx_processes[s].channel[0], F_SETOWN, ngx_pid) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"fcntl(F_SETOWN) failed while spawning \"%s\"", name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
/* FD_CLOEXEC:用来设置文件的close-on-exec状态标准
* 在exec()调用后,close-on-exec标志为0的情况下,此文件不被关闭;非零则在exec()后被关闭
* 默认close-on-exec状态为0,需要通过FD_CLOEXEC设置
* 这里意思是当Master父进程执行了exec()调用后,关闭socket
*/
if (fcntl(ngx_processes[s].channel[0], F_SETFD, FD_CLOEXEC) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"fcntl(FD_CLOEXEC) failed while spawning \"%s\"",
name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
/* 同上,这里意思是当Worker子进程执行了exec()调用后,关闭socket */
if (fcntl(ngx_processes[s].channel[1], F_SETFD, FD_CLOEXEC) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"fcntl(FD_CLOEXEC) failed while spawning \"%s\"",
name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
}
/* 设置当前子进程的socket,Master进程用于监听 */
ngx_channel = ngx_processes[s].channel[1];
} else {
ngx_processes[s].channel[0] = -1;
ngx_processes[s].channel[1] = -1;
}
ngx_process_slot = s; // 这一步将在ngx_pass_open_channel()中用到,就是设置下标,用于寻找本次创建的子进程
pid = fork(); // 不解释了吧
switch (pid) {
case -1:
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"fork() failed while spawning \"%s\"", name);
ngx_close_channel(ngx_processes[s].channel, cycle->log);
return NGX_INVALID_PID;
case 0:
ngx_pid = ngx_getpid(); // 设置子进程ID
proc(cycle, data); // 调用proc回调函数,即ngx_worker_process_cycle。之后worker子进程从这里开始执行
break;
default:
break;
}
ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "start %s %P", name, pid);
/* 这一部分用来设置ngx_process_t的成员变量 */
ngx_processes[s].pid = pid;
ngx_processes[s].exited = 0;
/* 如果大于0,则说明是在重启子进程,因此下面的初始化不用再重复做 */
if (respawn >= 0) {
return pid;
}
ngx_processes[s].proc = proc;
ngx_processes[s].data = data;
ngx_processes[s].name = name;
ngx_processes[s].exiting = 0;
/* OK,也不多说了,用来设置状态信息 */
switch (respawn) {
case NGX_PROCESS_NORESPAWN:
ngx_processes[s].respawn = 0;
ngx_processes[s].just_spawn = 0;
ngx_processes[s].detached = 0;
break;
case NGX_PROCESS_JUST_SPAWN:
ngx_processes[s].respawn = 0;
ngx_processes[s].just_spawn = 1;
ngx_processes[s].detached = 0;
break;
case NGX_PROCESS_RESPAWN:
ngx_processes[s].respawn = 1;
ngx_processes[s].just_spawn = 0;
ngx_processes[s].detached = 0;
break;
case NGX_PROCESS_JUST_RESPAWN:
ngx_processes[s].respawn = 1;
ngx_processes[s].just_spawn = 1;
ngx_processes[s].detached = 0;
break;
case NGX_PROCESS_DETACHED:
ngx_processes[s].respawn = 0;
ngx_processes[s].just_spawn = 0;
ngx_processes[s].detached = 1;
break;
}
/* 不用多说了 */
if (s == ngx_last_process) {
ngx_last_process++;
}
return pid;
}
3.向其他进程广播
Nginx是如何在进程间进行通信的呢,我们来看ngx_pass_open_channel函数:
static void
ngx_pass_open_channel(ngx_cycle_t *cycle, ngx_channel_t *ch)
{
ngx_int_t i;
/* ngx_last_process全局变量,同样在ngx_spawn_process()中被赋值,意为最后面的进程 */
for (i = 0; i < ngx_last_process; i++) {
// 跳过刚创建的worker子进程 || 不存在的子进程 || 其父进程socket关闭的子进程
if (i == ngx_process_slot
|| ngx_processes[i].pid == -1
|| ngx_processes[i].channel[0] == -1)
{
continue;
}
/* Nginx核心模块的调试日志 */
ngx_log_debug6(NGX_LOG_DEBUG_CORE, cycle->log, 0,
"pass channel s:%d pid:%P fd:%d to s:%i pid:%P fd:%d",
ch->slot, ch->pid, ch->fd,
i, ngx_processes[i].pid,
ngx_processes[i].channel[0]);
/* TODO: NGX_AGAIN */
/* 给每个进程的父进程发送刚创建worker进程的信息,IPC方式以后再搞 */
ngx_write_channel(ngx_processes[i].channel[0],
ch, sizeof(ngx_channel_t), cycle->log);
}
}
其中ngx_write_channel原型:
ngx_int_t
ngx_write_channel(ngx_socket_t s, ngx_channel_t *ch, size_t size, //ch内储存着本进程的信息,s是父进程的socket值(channel[0])
ngx_log_t *log)
{
ssize_t n;
ngx_err_t err;
struct iovec iov[1];
struct msghdr msg;
#if (NGX_HAVE_MSGHDR_MSG_CONTROL)
union {
struct cmsghdr cm;
char space[CMSG_SPACE(sizeof(int))];
} cmsg;
if (ch->fd == -1) {
msg.msg_control = NULL;
msg.msg_controllen = 0;
} else {
msg.msg_control = (caddr_t) &cmsg;
msg.msg_controllen = sizeof(cmsg);
cmsg.cm.cmsg_len = CMSG_LEN(sizeof(int));
cmsg.cm.cmsg_level = SOL_SOCKET;
cmsg.cm.cmsg_type = SCM_RIGHTS;
/*
* We have to use ngx_memcpy() instead of simple
* *(int *) CMSG_DATA(&cmsg.cm) = ch->fd;
* because some gcc 4.4 with -O2/3/s optimization issues the warning:
* dereferencing type-punned pointer will break strict-aliasing rules
*
* Fortunately, gcc with -O1 compiles this ngx_memcpy()
* in the same simple assignment as in the code above
*/
ngx_memcpy(CMSG_DATA(&cmsg.cm), &ch->fd, sizeof(int));
}
msg.msg_flags = 0;
#else
if (ch->fd == -1) {
msg.msg_accrights = NULL;
msg.msg_accrightslen = 0;
} else {
msg.msg_accrights = (caddr_t) &ch->fd;
msg.msg_accrightslen = sizeof(int);
}
#endif
iov[0].iov_base = (char *) ch;
iov[0].iov_len = size;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
n = sendmsg(s, &msg, 0);//sendmsg函数,在这里用于进程间通信
if (n == -1) {
err = ngx_errno;
if (err == NGX_EAGAIN) {
return NGX_AGAIN;
}
ngx_log_error(NGX_LOG_ALERT, log, err, "sendmsg() failed");
return NGX_ERROR;
}
return NGX_OK;
}
4. 关闭旧的worker进程
执行:ngx_signal_worker_processes() 函数
ngx_signal_worker_processes(cycle, ngx_signal_value(NGX_SHUTDOWN_SIGNAL));
以此关闭旧的worker进程。进入该函数,你会发现它也是循环向所有worker进程发送信号,所以它会先把旧worker进程关闭,然后再管理新的worker进程。代码体现:
// 1, 关闭旧的进程;
if (kill(ngx_processes[i].pid, signo) == -1) {
err = ngx_errno;
ngx_log_error(NGX_LOG_ALERT, cycle->log, err,
"kill(%P, %d) failed", ngx_processes[i].pid, signo);
if (err == NGX_ESRCH) {
ngx_processes[i].exited = 1;
ngx_processes[i].exiting = 0;
ngx_reap = 1;
}
continue;
}
// 2, 将新进程重置为0;
if (ngx_processes[i].just_spawn) {
ngx_processes[i].just_spawn = 0;
continue;
}
// 3, 完成重载。