启动进程时执行一个函数,这个函数是walreceiver 进程的主入口。
WalReceiverMain()
代码位置:
src/backend/replication/walreceiver.c
下面我们看一下主入口函数
/* Main entry point for walreceiver process */
void
WalReceiverMain(void)
{
......
/* 检查walrcv,walrcv 应该已经被设置 */
Assert(walrcv != NULL);
now = GetCurrentTimestamp();
/* 标记walreceiver在内存中为running。
* 这件事应尽早执行,如果之后失败了,我们将设置状态为STOPPED。如果在设置之前进程死掉,则启动进程将保持等待,并执行启动。 */
SpinLockAcquire(&walrcv->mutex);
Assert(walrcv->pid == 0);
switch (walrcv->walRcvState)
{
case WALRCV_STOPPING:
......
case WALRCV_STOPPED:
......
case WALRCV_STARTING:
......
case WALRCV_WAITING:
case WALRCV_STREAMING:
case WALRCV_RESTARTING:
default:
/* Shouldn't happen */
SpinLockRelease(&walrcv->mutex);
elog(PANIC, "walreceiver still running according to shared memory state");
}
/* Advertise our PID so that the startup process can kill us */
walrcv->pid = MyProcPid;
walrcv->walRcvState = WALRCV_STREAMING;
/* 获取启动 stream 所需的信息 */
walrcv->ready_to_display = false;
strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
startpoint = walrcv->receiveStart;
startpointTLI = walrcv->receiveStartTLI;
/* 初始化 sanish 值 */
walrcv->lastMsgSendTime =
walrcv->lastMsgReceiptTime = walrcv->latestWalEndTime = now;
/* 告诉闩锁来唤醒这个进程 */
walrcv->latch = &MyProc->procLatch;
SpinLockRelease(&walrcv->mutex);
/* 安排在 walreceiver 退出时清理内存 */
on_shmem_exit(WalRcvDie, 0);
/* 正确接受或忽略postmaster 可能发送给我们的信号 */
pqsignal(SIGHUP, WalRcvSigHupHandler); /* set flag to read config file */
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, WalRcvShutdownHandler); /* request shutdown */
pqsignal(SIGQUIT, WalRcvQuickDieHandler); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, WalRcvSigUsr1Handler);
pqsignal(SIGUSR2, SIG_IGN);
/* 重置被postmaster接收的信号 */
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
/* Load the libpq-specific functions */
load_file("libpqwalreceiver", false);
if (WalReceiverFunctions == NULL)
elog(ERROR, "libpqwalreceiver didn't initialize correctly");
/* 创建resource owner 来跟踪我们的资源 (不清楚我们需要这个,但也可能有一个). */
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Receiver");
/* Unblock signals (they were blocked when the postmaster forked us) */
PG_SETMASK(&UnBlockSig);
/* Establish the connection to the primary for XLOG streaming */
EnableWalRcvImmediateExit();
wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
if (!wrconn)
ereport(ERROR,
(errmsg("could not connect to the primary server: %s", err)));
DisableWalRcvImmediateExit();
/* 保存用户可见的连接字符串。为了安全起见,这就破坏了原来的连接信息。还保存此walreceiver 接收器连接到的发送器服务器的主机和端口。*/
tmp_conninfo = walrcv_get_conninfo(wrconn);
walrcv_get_senderinfo(wrconn, &sender_host, &sender_port);
SpinLockAcquire(&walrcv->mutex);
memset(walrcv->conninfo, 0, MAXCONNINFO);
if (tmp_conninfo)
strlcpy((char *) walrcv->conninfo, tmp_conninfo, MAXCONNINFO);
memset(walrcv->sender_host, 0, NI_MAXHOST);
if (sender_host)
strlcpy((char *) walrcv->sender_host, sender_host, NI_MAXHOST);
walrcv->sender_port = sender_port;
walrcv->ready_to_display = true;
SpinLockRelease(&walrcv->mutex);
if (tmp_conninfo)
pfree(tmp_conninfo);
if (sender_host)
pfree(sender_host);
first_stream = true;
for (;;)
{
char *primary_sysid;
char standby_sysid[32];
int server_version;
WalRcvStreamOptions options;
/* 检查我们使用IDENTIFY_SYSTEM replication命令连接到可用的server */
EnableWalRcvImmediateExit();
primary_sysid = walrcv_identify_system(wrconn, &primaryTLI,
&server_version);
snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT,
GetSystemIdentifier());
if (strcmp(primary_sysid, standby_sysid) != 0)
{
ereport(ERROR,
(errmsg("database system identifier differs between the primary and standby"),
errdetail("The primary's identifier is %s, the standby's identifier is %s.",
primary_sysid, standby_sysid)));
}
DisableWalRcvImmediateExit();
/* 确认主时间的当前时间线是相同的或在我们的前面。 */
if (primaryTLI < startpointTLI)
ereport(ERROR,
(errmsg("highest timeline %u of the primary is behind recovery timeline %u",
primaryTLI, startpointTLI)));
/*获取任何丢失的历史文件。我们总是这样做的,即使我们对这个时间线不感兴趣,因此如果我们以后被提升为master,我们就不会选择与当前 master 中已经使用的时间线相同的时间线。这并不是万无一失的——如果您需要确保在每种情况下都选择唯一的时间轴id,那么将需要一些外部软件来管理集群,但是让我们尽可能避免时间轴id冲突的混淆。 */
WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
/* 开始streaming。我们将尝试从请求的起始点和时间线开始,即使它与服务器的最新时间线不同。如果我们已经到达旧的时间表的末尾,服务器将立即完成streaming ,我们将返回等待启动过程。如果recovery_target_timeline是“最新的”,则启动进程将扫描pg_wal并找到新的历史文件、使用恢复目标时间线,并请求我们在新的时间线上重新启动。 */
options.logical = false;
options.startpoint = startpoint;
options.slotname = slotname[0] != '\0' ? slotname : NULL;
options.proto.physical.startpointTLI = startpointTLI;
ThisTimeLineID = startpointTLI;
if (walrcv_startstreaming(wrconn, &options))
{
if (first_stream)
ereport(LOG,
(errmsg("started streaming WAL from primary at %X/%X on timeline %u",
(uint32) (startpoint >> 32), (uint32) startpoint,
startpointTLI)));
else
ereport(LOG,
(errmsg("restarted WAL streaming at %X/%X on timeline %u",
(uint32) (startpoint >> 32), (uint32) startpoint,
startpointTLI)));
first_stream = false;
/* 初始化 LogstreamResult 和处理消息的缓冲区 */
LogstreamResult.Write = LogstreamResult.Flush = GetXLogReplayRecPtr(NULL);
initStringInfo(&reply_message);
initStringInfo(&incoming_message);
/* Initialize the last recv timestamp */
last_recv_timestamp = GetCurrentTimestamp();
ping_sent = false;
/* 循环直到streaming结束,或出现错误 */
for (;;)
{
char *buf;
int len;
bool endofwal = false;
pgsocket wait_fd = PGINVALID_SOCKET;
int rc;
/* 如果我们没有恢复,退出walreceiver。这不应该发生,但是交叉检查这里的状态。 */
if (!RecoveryInProgress())
ereport(FATAL,
(errmsg("cannot continue WAL streaming, recovery has already ended")));
/* 处理最近收到的任何请求或信号 */
ProcessWalRcvInterrupts();
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
XLogWalRcvSendHSFeedback(true);
}
/* See if we can read data immediately */
len = walrcv_receive(wrconn, &buf, &wait_fd);
if (len != 0)
{
/* 处理接收到的数据,以及我们可以在不阻塞的情况下读取的任何后续数据。 */
for (;;)
{
if (len > 0)
{
/* 从master那里收到了一些东西,所以重置超时 */
last_recv_timestamp = GetCurrentTimestamp();
ping_sent = false;
XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1);
}
else if (len == 0)
break;
else if (len < 0)
{
ereport(LOG,
(errmsg("replication terminated by primary server"),
errdetail("End of WAL reached on timeline %u at %X/%X.",
startpointTLI,
(uint32) (LogstreamResult.Write >> 32), (uint32) LogstreamResult.Write)));
endofwal = true;
break;
}
len = walrcv_receive(wrconn, &buf, &wait_fd);
}
/* Let the master know that we received some data. */
XLogWalRcvSendReply(false, false);
/* 如果我们已经写了一些记录,将它们刷新到磁盘,让启动过程和主服务器知道它们。 */
XLogWalRcvFlush(false);
}
/* Check if we need to exit the streaming loop. */
if (endofwal)
break;
/* 理想情况下,我们将在这里重复使用 WaitEventSet 对象以避免在epoll系统上 WaitLatchOrSocket 的开销,但是我们不能确定libpq具有相同的套接字(即使fd是相同的数字,它也许自上次以来已经被关闭并重新打开)。将来,如果有一个函数用于从 WaitEventSet 中删除套接字,那么我们可以每次只添加和删除套接字,从而潜在地避免一些系统调用。 */
Assert(wait_fd != PGINVALID_SOCKET);
rc = WaitLatchOrSocket(walrcv->latch,
WL_POSTMASTER_DEATH | WL_SOCKET_READABLE |
WL_TIMEOUT | WL_LATCH_SET,
wait_fd,
NAPTIME_PER_CYCLE,
WAIT_EVENT_WAL_RECEIVER_MAIN);
if (rc & WL_LATCH_SET)
{
ResetLatch(walrcv->latch);
if (walrcv->force_reply)
{
/* 恢复过程要求我们现在发送应用反馈。在发送回复之前,请确保标记在共享内存中设置为false,因此我们不会错过答复的新请求。 */
walrcv->force_reply = false;
pg_memory_barrier();
XLogWalRcvSendReply(true, false);
}
}
if (rc & WL_POSTMASTER_DEATH)
{
/* 如果postmaster 进程死了,将紧急救助。这是为了避免对所有postmaster的子进程进行手工清理。 */
exit(1);
}
if (rc & WL_TIMEOUT)
{
/* 我们没有收到任何新东西。如果我们还没有从服务器上听到任何关于wal_receiver_timeout/2的消息,请ping服务器。而且,如果从我们上次发送更新以来它比wal_receiver_status_interval长,那么无论如何都要向主服务器发送状态更新,以报告应用WAL的任何进展。 */
bool requestReply = false;
/*
* Check if time since last receive from standby has
* reached the configured limit.
*/
if (wal_receiver_timeout > 0)
{
TimestampTz now = GetCurrentTimestamp();
TimestampTz timeout;
timeout =
TimestampTzPlusMilliseconds(last_recv_timestamp,
wal_receiver_timeout);
if (now >= timeout)
ereport(ERROR,
(errmsg("terminating walreceiver due to timeout")));
/* 如果一半的接收器复制超时,我们没有收到任何新的消息。ping服务器。 */
if (!ping_sent)
{
timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
(wal_receiver_timeout / 2));
if (now >= timeout)
{
requestReply = true;
ping_sent = true;
}
}
}
XLogWalRcvSendReply(requestReply, requestReply);
XLogWalRcvSendHSFeedback(false);
}
}
/* streaming后台结束。退出streaming COPY-mode */
EnableWalRcvImmediateExit();
walrcv_endstreaming(wrconn, &primaryTLI);
DisableWalRcvImmediateExit();
/* 如果服务器切换到一个新的时间线,而我们开始流式传输时不知道,那么现在获取它的时间线历史文件。 */
WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
}
else
ereport(LOG,
(errmsg("primary server contains no more WAL on requested timeline %u",
startpointTLI)));
/* WAL的末尾到达请求的时间线。关闭最后一个片段,等待启动过程中的新请求。 */
if (recvFile >= 0)
{
char xlogfname[MAXFNAMELEN];
XLogWalRcvFlush(false);
if (close(recvFile) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close log segment %s: %m",
XLogFileNameP(recvFileTLI, recvSegNo))));
/* 强制创建.done文件,防止streaming 段被归档,文件丢失。 */
XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
XLogArchiveForceDone(xlogfname);
else
XLogArchiveNotify(xlogfname);
}
recvFile = -1;
elog(DEBUG1, "walreceiver ended streaming and awaits new instructions");
WalRcvWaitForStartPosition(&startpoint, &startpointTLI);
}
/* not reached */
}