Postgresql - 源码 - walreceiver process

启动进程时执行一个函数,这个函数是walreceiver 进程的主入口。

WalReceiverMain()

 

代码位置:

src/backend/replication/walreceiver.c

 

下面我们看一下主入口函数

 

/* Main entry point for walreceiver process */

void

WalReceiverMain(void)

{

    ......

 

    /* 检查walrcv,walrcv 应该已经被设置 */

    Assert(walrcv != NULL);

 

    now = GetCurrentTimestamp();

 

    /* 标记walreceiver在内存中为running。

     * 这件事应尽早执行,如果之后失败了,我们将设置状态为STOPPED。如果在设置之前进程死掉,则启动进程将保持等待,并执行启动。 */

    SpinLockAcquire(&walrcv->mutex);

    Assert(walrcv->pid == 0);

    switch (walrcv->walRcvState)

    {

        case WALRCV_STOPPING:

            ......

        case WALRCV_STOPPED:

            ......

        case WALRCV_STARTING:

            ......

        case WALRCV_WAITING:

        case WALRCV_STREAMING:

        case WALRCV_RESTARTING:

        default:

            /* Shouldn't happen */

            SpinLockRelease(&walrcv->mutex);

            elog(PANIC, "walreceiver still running according to shared memory state");

    }

    /* Advertise our PID so that the startup process can kill us */

    walrcv->pid = MyProcPid;

    walrcv->walRcvState = WALRCV_STREAMING;

 

    /* 获取启动 stream 所需的信息 */

    walrcv->ready_to_display = false;

    strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);

    strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);

    startpoint = walrcv->receiveStart;

    startpointTLI = walrcv->receiveStartTLI;

 

    /* 初始化 sanish 值 */

    walrcv->lastMsgSendTime =

        walrcv->lastMsgReceiptTime = walrcv->latestWalEndTime = now;

 

    /* 告诉闩锁来唤醒这个进程 */

    walrcv->latch = &MyProc->procLatch;

 

    SpinLockRelease(&walrcv->mutex);

 

    /* 安排在 walreceiver 退出时清理内存 */

    on_shmem_exit(WalRcvDie, 0);

 

    /* 正确接受或忽略postmaster 可能发送给我们的信号 */

    pqsignal(SIGHUP, WalRcvSigHupHandler);  /* set flag to read config file */

    pqsignal(SIGINT, SIG_IGN);

    pqsignal(SIGTERM, WalRcvShutdownHandler);   /* request shutdown */

    pqsignal(SIGQUIT, WalRcvQuickDieHandler);   /* hard crash time */

    pqsignal(SIGALRM, SIG_IGN);

    pqsignal(SIGPIPE, SIG_IGN);

    pqsignal(SIGUSR1, WalRcvSigUsr1Handler);

    pqsignal(SIGUSR2, SIG_IGN);

 

    /* 重置被postmaster接收的信号 */

    pqsignal(SIGCHLD, SIG_DFL);

    pqsignal(SIGTTIN, SIG_DFL);

    pqsignal(SIGTTOU, SIG_DFL);

    pqsignal(SIGCONT, SIG_DFL);

    pqsignal(SIGWINCH, SIG_DFL);

 

    /* We allow SIGQUIT (quickdie) at all times */

    sigdelset(&BlockSig, SIGQUIT);

 

    /* Load the libpq-specific functions */

    load_file("libpqwalreceiver", false);

    if (WalReceiverFunctions == NULL)

        elog(ERROR, "libpqwalreceiver didn't initialize correctly");

 

    /* 创建resource owner 来跟踪我们的资源 (不清楚我们需要这个,但也可能有一个). */

    CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Receiver");

 

    /* Unblock signals (they were blocked when the postmaster forked us) */

    PG_SETMASK(&UnBlockSig);

 

    /* Establish the connection to the primary for XLOG streaming */

    EnableWalRcvImmediateExit();

    wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);

    if (!wrconn)

        ereport(ERROR,

                (errmsg("could not connect to the primary server: %s", err)));

    DisableWalRcvImmediateExit();

 

    /* 保存用户可见的连接字符串。为了安全起见,这就破坏了原来的连接信息。还保存此walreceiver 接收器连接到的发送器服务器的主机和端口。*/

    tmp_conninfo = walrcv_get_conninfo(wrconn);

    walrcv_get_senderinfo(wrconn, &sender_host, &sender_port);

    SpinLockAcquire(&walrcv->mutex);

    memset(walrcv->conninfo, 0, MAXCONNINFO);

    if (tmp_conninfo)

        strlcpy((char *) walrcv->conninfo, tmp_conninfo, MAXCONNINFO);

 

    memset(walrcv->sender_host, 0, NI_MAXHOST);

    if (sender_host)

        strlcpy((char *) walrcv->sender_host, sender_host, NI_MAXHOST);

 

    walrcv->sender_port = sender_port;

    walrcv->ready_to_display = true;

    SpinLockRelease(&walrcv->mutex);

 

    if (tmp_conninfo)

        pfree(tmp_conninfo);

 

    if (sender_host)

        pfree(sender_host);

 

    first_stream = true;

    for (;;)

    {

        char     *primary_sysid;

        char        standby_sysid[32];

        int         server_version;

        WalRcvStreamOptions options;

 

        /* 检查我们使用IDENTIFY_SYSTEM replication命令连接到可用的server */

        EnableWalRcvImmediateExit();

        primary_sysid = walrcv_identify_system(wrconn, &primaryTLI,

                                             &server_version);

 

        snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT,

                 GetSystemIdentifier());

        if (strcmp(primary_sysid, standby_sysid) != 0)

        {

            ereport(ERROR,

                    (errmsg("database system identifier differs between the primary and standby"),

                     errdetail("The primary's identifier is %s, the standby's identifier is %s.",

                             primary_sysid, standby_sysid)));

        }

        DisableWalRcvImmediateExit();

 

        /* 确认主时间的当前时间线是相同的或在我们的前面。 */

        if (primaryTLI < startpointTLI)

            ereport(ERROR,

                    (errmsg("highest timeline %u of the primary is behind recovery timeline %u",

                            primaryTLI, startpointTLI)));

 

        /*获取任何丢失的历史文件。我们总是这样做的,即使我们对这个时间线不感兴趣,因此如果我们以后被提升为master,我们就不会选择与当前 master 中已经使用的时间线相同的时间线。这并不是万无一失的——如果您需要确保在每种情况下都选择唯一的时间轴id,那么将需要一些外部软件来管理集群,但是让我们尽可能避免时间轴id冲突的混淆。 */

        WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);

 

        /* 开始streaming。我们将尝试从请求的起始点和时间线开始,即使它与服务器的最新时间线不同。如果我们已经到达旧的时间表的末尾,服务器将立即完成streaming ,我们将返回等待启动过程。如果recovery_target_timeline是“最新的”,则启动进程将扫描pg_wal并找到新的历史文件、使用恢复目标时间线,并请求我们在新的时间线上重新启动。 */

        options.logical = false;

        options.startpoint = startpoint;

        options.slotname = slotname[0] != '\0' ? slotname : NULL;

        options.proto.physical.startpointTLI = startpointTLI;

        ThisTimeLineID = startpointTLI;

        if (walrcv_startstreaming(wrconn, &options))

        {

            if (first_stream)

                ereport(LOG,

                        (errmsg("started streaming WAL from primary at %X/%X on timeline %u",

                                (uint32) (startpoint >> 32), (uint32) startpoint,

                                startpointTLI)));

            else

                ereport(LOG,

                        (errmsg("restarted WAL streaming at %X/%X on timeline %u",

                                (uint32) (startpoint >> 32), (uint32) startpoint,

                                startpointTLI)));

            first_stream = false;

 

            /* 初始化 LogstreamResult 和处理消息的缓冲区 */

            LogstreamResult.Write = LogstreamResult.Flush = GetXLogReplayRecPtr(NULL);

            initStringInfo(&reply_message);

            initStringInfo(&incoming_message);

 

            /* Initialize the last recv timestamp */

            last_recv_timestamp = GetCurrentTimestamp();

            ping_sent = false;

 

            /* 循环直到streaming结束,或出现错误 */

            for (;;)

            {

                char     *buf;

                int         len;

                bool        endofwal = false;

                pgsocket    wait_fd = PGINVALID_SOCKET;

                int         rc;

 

                /* 如果我们没有恢复,退出walreceiver。这不应该发生,但是交叉检查这里的状态。 */

                if (!RecoveryInProgress())

                    ereport(FATAL,

                            (errmsg("cannot continue WAL streaming, recovery has already ended")));

 

                /* 处理最近收到的任何请求或信号 */

                ProcessWalRcvInterrupts();

 

                if (got_SIGHUP)

                {

                    got_SIGHUP = false;

                    ProcessConfigFile(PGC_SIGHUP);

                    XLogWalRcvSendHSFeedback(true);

                }

 

                /* See if we can read data immediately */

                len = walrcv_receive(wrconn, &buf, &wait_fd);

                if (len != 0)

                {

                    /* 处理接收到的数据,以及我们可以在不阻塞的情况下读取的任何后续数据。  */

                    for (;;)

                    {

                        if (len > 0)

                        {

                            /* 从master那里收到了一些东西,所以重置超时 */

                            last_recv_timestamp = GetCurrentTimestamp();

                            ping_sent = false;

                            XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1);

                        }

                        else if (len == 0)

                            break;

                        else if (len < 0)

                        {

                            ereport(LOG,

                                    (errmsg("replication terminated by primary server"),

                                     errdetail("End of WAL reached on timeline %u at %X/%X.",

                                             startpointTLI,

                                             (uint32) (LogstreamResult.Write >> 32), (uint32) LogstreamResult.Write)));

                            endofwal = true;

                            break;

                        }

                        len = walrcv_receive(wrconn, &buf, &wait_fd);

                    }

 

                    /* Let the master know that we received some data. */

                    XLogWalRcvSendReply(false, false);

 

                    /* 如果我们已经写了一些记录,将它们刷新到磁盘,让启动过程和主服务器知道它们。 */

                    XLogWalRcvFlush(false);

                }

 

                /* Check if we need to exit the streaming loop. */

                if (endofwal)

                    break;

 

                /* 理想情况下,我们将在这里重复使用 WaitEventSet 对象以避免在epoll系统上 WaitLatchOrSocket 的开销,但是我们不能确定libpq具有相同的套接字(即使fd是相同的数字,它也许自上次以来已经被关闭并重新打开)。将来,如果有一个函数用于从 WaitEventSet 中删除套接字,那么我们可以每次只添加和删除套接字,从而潜在地避免一些系统调用。 */

                Assert(wait_fd != PGINVALID_SOCKET);

                rc = WaitLatchOrSocket(walrcv->latch,

                                     WL_POSTMASTER_DEATH | WL_SOCKET_READABLE |

                                     WL_TIMEOUT | WL_LATCH_SET,

                                     wait_fd,

                                     NAPTIME_PER_CYCLE,

                                     WAIT_EVENT_WAL_RECEIVER_MAIN);

                if (rc & WL_LATCH_SET)

                {

                    ResetLatch(walrcv->latch);

                    if (walrcv->force_reply)

                    {

                        /* 恢复过程要求我们现在发送应用反馈。在发送回复之前,请确保标记在共享内存中设置为false,因此我们不会错过答复的新请求。 */

                        walrcv->force_reply = false;

                        pg_memory_barrier();

                        XLogWalRcvSendReply(true, false);

                    }

                }

                if (rc & WL_POSTMASTER_DEATH)

                {

                    /* 如果postmaster 进程死了,将紧急救助。这是为了避免对所有postmaster的子进程进行手工清理。 */

                    exit(1);

                }

                if (rc & WL_TIMEOUT)

                {

                    /* 我们没有收到任何新东西。如果我们还没有从服务器上听到任何关于wal_receiver_timeout/2的消息,请ping服务器。而且,如果从我们上次发送更新以来它比wal_receiver_status_interval长,那么无论如何都要向主服务器发送状态更新,以报告应用WAL的任何进展。 */

                    bool        requestReply = false;

 

                    /*

                     * Check if time since last receive from standby has

                     * reached the configured limit.

                     */

                    if (wal_receiver_timeout > 0)

                    {

                        TimestampTz now = GetCurrentTimestamp();

                        TimestampTz timeout;

 

                        timeout =

                            TimestampTzPlusMilliseconds(last_recv_timestamp,

                                                        wal_receiver_timeout);

 

                        if (now >= timeout)

                            ereport(ERROR,

                                    (errmsg("terminating walreceiver due to timeout")));

 

                        /* 如果一半的接收器复制超时,我们没有收到任何新的消息。ping服务器。  */

                        if (!ping_sent)

                        {

                            timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,

                                                                 (wal_receiver_timeout / 2));

                            if (now >= timeout)

                            {

                                requestReply = true;

                                ping_sent = true;

                            }

                        }

                    }

 

                    XLogWalRcvSendReply(requestReply, requestReply);

                    XLogWalRcvSendHSFeedback(false);

                }

            }

 

            /* streaming后台结束。退出streaming COPY-mode */

            EnableWalRcvImmediateExit();

            walrcv_endstreaming(wrconn, &primaryTLI);

            DisableWalRcvImmediateExit();

 

            /* 如果服务器切换到一个新的时间线,而我们开始流式传输时不知道,那么现在获取它的时间线历史文件。 */

            WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);

        }

        else

            ereport(LOG,

                    (errmsg("primary server contains no more WAL on requested timeline %u",

                            startpointTLI)));

 

        /* WAL的末尾到达请求的时间线。关闭最后一个片段,等待启动过程中的新请求。 */

        if (recvFile >= 0)

        {

            char        xlogfname[MAXFNAMELEN];

 

            XLogWalRcvFlush(false);

            if (close(recvFile) != 0)

                ereport(PANIC,

                        (errcode_for_file_access(),

                         errmsg("could not close log segment %s: %m",

                                XLogFileNameP(recvFileTLI, recvSegNo))));

 

            /* 强制创建.done文件,防止streaming 段被归档,文件丢失。 */

            XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);

            if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)

                XLogArchiveForceDone(xlogfname);

            else

                XLogArchiveNotify(xlogfname);

        }

        recvFile = -1;

 

        elog(DEBUG1, "walreceiver ended streaming and awaits new instructions");

        WalRcvWaitForStartPosition(&startpoint, &startpointTLI);

    }

    /* not reached */

}

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值