PostgreSQL启动恢复读取checkpoint记录失败的条件

PostgreSQL启动恢复读取checkpoint记录失败的条件

 
  1. 1、首先读取ControlFile->checkPoint指向的checkpoint

  2. 2、如果读取失败,slave直接abort退出,master再次读取ControlFile->prevCheckPoint指向的checkpoint

  3. StartupXLOG->

  4. |--checkPointLoc = ControlFile->checkPoint;

  5. |--record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true):

  6. |-- if (record != NULL){

  7. ...

  8. }else if (StandbyMode){

  9. ereport(PANIC,(errmsg("could not locate a valid checkpoint record")));

  10. }else{

  11. checkPointLoc = ControlFile->prevCheckPoint;

  12. record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);

  13. if (record != NULL){

  14. InRecovery = true;//标记下面进入recovery

  15. }else{

  16. ereport(PANIC,(errmsg("could not locate a valid checkpoint record")));

  17. }

  18. }

  19. ...

一、那么什么条件下读取的checkpoint记录record==NULL?

 
  1. 1、ControlFile->checkPoint % XLOG_BLCKSZ < SizeOfXLogShortPHD

  2. 2、ReadRecord(xlogreader, ControlFile->checkPoint, LOG, true)返回NULL

  3. 3、ReadRecord读到的record!=NULL && record->xl_rmid != RM_XLOG_ID

  4. 4、ReadRecord读到的record!=NULL && info != XLOG_CHECKPOINT_SHUTDOWN && info != XLOG_CHECKPOINT_ONLINE

  5. 5、ReadRecord读到的record!=NULL && record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint)

二、ReadRecord函数返回NULL的条件 

 
  1. ReadRecord(xlogreader, ControlFile->checkPoint, LOG, true)

  2. |--record = XLogReadRecord(xlogreader, ControlFile->checkPoint, &errormsg);

  3. |-- 2.1 record==NULL && !StandbyMode

  4. |-- 2.2 record!=NULL && !tliInHistory(xlogreader->latestPageTLI, expectedTLEs)

  5. /*-----

  6. note:只要读取了一页xlog,就会赋值为该页第一个记录的时间线

  7. XLogReaderValidatePageHeader

  8. -->xlogreader->latestPageTLI=hdr->xlp_tli;

  9. ------*/

三、XlogReadRecord读取checkpoint返回NULL的条件?

 
  1. XLogReadRecord(xlogreader, ControlFile->checkPoint, &errormsg)

  2. targetPagePtr = ControlFile->checkPoint - (ControlFile->checkPoint % XLOG_BLCKSZ);

  3. targetRecOff = ControlFile->checkPoint % XLOG_BLCKSZ;

  4. readOff = ReadPageInternal(state,targetPagePtr, Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));

  5. pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);

  6. record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);

  7. total_len = record->xl_tot_len;

  8. -------------

  9. 1、readOff < 0

  10. 2、0< targetRecOff < pageHeaderSize

  11. 3、(((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize

  12. page头有跨页的record并且checkpoint定位的偏移正好在页头尾部

  13. 4、targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord &&

  14. !ValidXLogRecordHeader(state, ControlFile->checkPoint, state->ReadRecPtr, record,randAccess)

  15. ---(record->xl_tot_len < SizeOfXLogRecord || record->xl_rmid > RM_MAX_ID || record->xl_prev != state->ReadRecPtr)

  16. 5、targetRecOff > XLOG_BLCKSZ - SizeOfXLogRecord && total_len < SizeOfXLogRecord

  17. 6、total_len > state->readRecordBufSize && !allocate_recordbuf(state, total_len)

  18. 一旦该记录损坏,total_len的长度非常大的话,就需要allocate_recordbuf扩展state->readbuf,可能因此分配失败abort

  19. 记录的checksum需要等待全部读取完整记录后才校验

  20. -------------

三、ReadPageInternal返回的readOff返回小于0的条件

 
  1. ReadPageInternal(state,targetPagePtr, Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ))

  2. 1、第一次read wal文件,readLen = state->read_page:读取第一页。readLen < 0

  3. 2、readLen>0 && !XLogReaderValidatePageHeader(state, targetSegmentPtr, state->readBuf)

  4. --

  5. 3、读取checkpoint所在页readLen = state->read_page: readLen < 0

  6. 4、readLen > 0 && readLen <= SizeOfXLogShortPHD

  7. 5、!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)

四、XLogPageRead何时返回值<0 ?

 
  1. /*

  2. 1、WaitForWALToBecomeAvailable open失败

  3. 2、lseek 失败 && !StandbyMode

  4. 3、read失败 && !StandbyMode

  5. 4、校验page头失败 && !StandbyMode

  6. 如果是StandbyMode,则会重新retry->WaitForWALToBecomeAvailable,切换日志源进行open

  7. */

  8. !WaitForWALToBecomeAvailable(targetPagePtr + reqLen,private->randAccess,1,targetRecPtr)//open

  9. |-- return -1

  10. readOff = targetPageOff;

  11. if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0){

  12. !StandbyMode:: return -1

  13. }

  14. if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ){

  15. !StandbyMode:: return -1

  16. }

  17. XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf)

  18. !StandbyMode:: return -1

五、WaitForWALToBecomeAvailable何时返回false?

 
  1. --XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL

  2. 1、先XLogFileReadAnyTLI open日志:

  3. 1、遍历时间线列表里的每一个时间线,从最新的开始

  4. 2、当读取checkpoint的时候,source是XLOG_FROM_ANY

  5. 3、先找归档的日志进行open;如果open失败再找WAL日志进行open

  6. 4、如果都没有open成功,则向前找时间线,open前一个时间线segno和文件号相同的文件进行open

  7. 5、open成功后expectedTLEs被赋值为当前时间线列表的所有值

  8. 2、如果open失败,则切换日志源:XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL -> XLOG_FROM_STREAM

  9. 3、切换日志源后,XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL 则:

  10. slave && promote :return false

  11. !StandbyMode:return false

  12. --XLOG_FROM_STREAM

  13. 1、!WalRcvStreaming()即receiver进程挂了,切换日志源

  14. 2、CheckForStandbyTrigger()切换日志源

  15. 3、XLOG_FROM_STREAM->XLOG_FROM_ARCHIVE

六、代码流程:

 
  1. static XLogRecord * ReadCheckpointRecord(

  2. XLogReaderState *xlogreader,

  3. XLogRecPtr RecPtr,

  4. int whichChkpt,

  5. bool report

  6. )

  7. {

  8. //((RecPtr) % XLOG_BLCKSZ >= SizeOfXLogShortPHD)

  9. if (!XRecOffIsValid(RecPtr)){

  10. ...

  11. return NULL;

  12. }

  13.  
  14. record = ReadRecord(xlogreader, RecPtr, LOG, true);

  15.  
  16. if (record == NULL){

  17. ...

  18. return NULL;

  19. }

  20. if (record->xl_rmid != RM_XLOG_ID){

  21. ...

  22. return NULL;

  23. }

  24. info = record->xl_info & ~XLR_INFO_MASK;

  25. if (info != XLOG_CHECKPOINT_SHUTDOWN &&

  26. info != XLOG_CHECKPOINT_ONLINE){

  27. ...

  28. return NULL;

  29. }

  30. if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint)){

  31. ...

  32. return NULL;

  33. }

  34. return record;

  35. }

 
  1. static int ReadPageInternal(

  2. XLogReaderState *state,

  3. XLogRecPtr pageptr,

  4. int reqLen

  5. )

  6. {

  7.  
  8. XLByteToSeg(pageptr, targetSegNo);

  9. targetPageOff = (pageptr % XLogSegSize);

  10. /*

  11. 1、第一次read段文件,先read第一页并进行校验:readLen <0 或 readLength >=0 && 页头没校验通过

  12. */

  13. if (targetSegNo != state->readSegNo && targetPageOff != 0){

  14. XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;

  15. readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,

  16. state->currRecPtr,

  17. state->readBuf, &state->readPageTLI);

  18. if (readLen < 0)

  19. goto err;

  20. if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, state->readBuf))

  21. goto err;

  22. }

  23.  
  24. /*

  25. 2、read至少short page header大小:

  26. 1)readLen < 0

  27. 2)readLen <= SizeOfXLogShortPHD

  28. */

  29. readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),

  30. state->currRecPtr,

  31. state->readBuf, &state->readPageTLI);

  32. if (readLen < 0)

  33. goto err;

  34. if (readLen <= SizeOfXLogShortPHD)

  35. goto err;

  36.  
  37. hdr = (XLogPageHeader) state->readBuf;

  38.  
  39. /*

  40. 3、如果读取的不够,需要再次继续读取

  41. */

  42. if (readLen < XLogPageHeaderSize(hdr)){

  43. readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr),

  44. state->currRecPtr,

  45. state->readBuf, &state->readPageTLI);

  46. if (readLen < 0)

  47. goto err;

  48. }

  49. /*

  50. 3)校验整个页头没有通过。校验通过会state->latestPageTLI = hdr->xlp_tli;

  51. */

  52. if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))

  53. goto err;

  54. //最后更新读取的状态

  55. state->readSegNo = targetSegNo;

  56. state->readOff = targetPageOff;

  57. state->readLen = readLen;

  58. return readLen;

  59.  
  60. err:

  61. XLogReaderInvalReadState(state);

  62. return -1;

  63. }

 
  1. static bool WaitForWALToBecomeAvailable(

  2. XLogRecPtr RecPtr,

  3. bool randAccess,

  4. bool fetching_ckpt,

  5. XLogRecPtr tliRecPtr

  6. )

  7. {

  8. /*

  9. 1、currentSource读取checkpoint时是0,首先从XLOG_FROM_ARCHIVE进行open

  10. */

  11. if (!InArchiveRecovery)

  12. currentSource = XLOG_FROM_PG_WAL;

  13. else if (currentSource == 0)

  14. currentSource = XLOG_FROM_ARCHIVE;

  15.  
  16. for (;;)

  17. {

  18. int oldSource = currentSource;

  19. /*

  20. 2、切换日志源

  21. */

  22. if (lastSourceFailed){

  23. switch (currentSource){

  24. case XLOG_FROM_ARCHIVE:

  25. case XLOG_FROM_PG_WAL:

  26. if (StandbyMode && CheckForStandbyTrigger()){

  27. ShutdownWalRcv();

  28. return false;

  29. }

  30. /*只有在slave下才会切换*/

  31. if (!StandbyMode)

  32. return false;

  33. /*如果recovery.conf配置了连接master的信息,则计算并启动receiv*/

  34. if (PrimaryConnInfo){//后续需要单独详细解析

  35. if (fetching_ckpt){//读取checkpoint

  36. ptr = RedoStartLSN;

  37. tli = ControlFile->checkPointCopy.ThisTimeLineID;

  38. }else{

  39. ptr = RecPtr;

  40. tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);

  41. }

  42. curFileTLI = tli;

  43. RequestXLogStreaming(tli, ptr, PrimaryConnInfo,PrimarySlotName);

  44. receivedUpto = 0;

  45. }

  46. currentSource = XLOG_FROM_STREAM;

  47. break;

  48.  
  49. case XLOG_FROM_STREAM:

  50. if (WalRcvStreaming())

  51. ShutdownWalRcv();

  52. if (recoveryTargetIsLatest){

  53. if (rescanLatestTimeLine()){

  54. currentSource = XLOG_FROM_ARCHIVE;

  55. break;

  56. }

  57. }

  58. now = GetCurrentTimestamp();

  59. if (!TimestampDifferenceExceeds(last_fail_time, now,

  60. wal_retrieve_retry_interval)){

  61. TimestampDifference(last_fail_time, now, &secs, &usecs);

  62. wait_time = wal_retrieve_retry_interval -

  63. (secs * 1000 + usecs / 1000);

  64. WaitLatch(&XLogCtl->recoveryWakeupLatch,

  65. WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,

  66. wait_time, WAIT_EVENT_RECOVERY_WAL_STREAM);

  67. ResetLatch(&XLogCtl->recoveryWakeupLatch);

  68. now = GetCurrentTimestamp();

  69. }

  70. last_fail_time = now;

  71. currentSource = XLOG_FROM_ARCHIVE;

  72. break;

  73.  
  74. default:

  75. elog(ERROR, "unexpected WAL source %d", currentSource);

  76. }

  77. }else if (currentSource == XLOG_FROM_PG_WAL){

  78. if (InArchiveRecovery)

  79. currentSource = XLOG_FROM_ARCHIVE;

  80. }

  81.  
  82. /*

  83. 3、先进来进行open

  84. */

  85. lastSourceFailed = false;

  86. switch (currentSource)

  87. {

  88. case XLOG_FROM_ARCHIVE:

  89. case XLOG_FROM_PG_WAL:

  90. readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,

  91. currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :

  92. currentSource);

  93. if (readFile >= 0)

  94. return true; /* success! */

  95. /*open失败,进入for循环切换日志源*/

  96. lastSourceFailed = true;

  97. break;

  98. case XLOG_FROM_STREAM:{

  99. if (!WalRcvStreaming()){

  100. lastSourceFailed = true;

  101. break;

  102. }

  103. if (RecPtr < receivedUpto)

  104. havedata = true;

  105. else{

  106. XLogRecPtr latestChunkStart;

  107. receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart, &receiveTLI);

  108. if (RecPtr < receivedUpto && receiveTLI == curFileTLI){

  109. havedata = true;

  110. if (latestChunkStart <= RecPtr){

  111. XLogReceiptTime = GetCurrentTimestamp();

  112. SetCurrentChunkStartTime(XLogReceiptTime);

  113. }

  114. }

  115. else

  116. havedata = false;

  117. }

  118. if (havedata){

  119. if (readFile < 0){

  120. if (!expectedTLEs)

  121. expectedTLEs = readTimeLineHistory(receiveTLI);

  122. readFile = XLogFileRead(readSegNo, PANIC,

  123. receiveTLI,

  124. XLOG_FROM_STREAM, false);

  125. }else{

  126. /* just make sure source info is correct... */

  127. readSource = XLOG_FROM_STREAM;

  128. XLogReceiptSource = XLOG_FROM_STREAM;

  129. return true;

  130. }

  131. break;

  132. }

  133. if (CheckForStandbyTrigger()){

  134. lastSourceFailed = true;

  135. break;

  136. }

  137. if (!streaming_reply_sent){

  138. WalRcvForceReply();

  139. streaming_reply_sent = true;

  140. }

  141.  
  142. WaitLatch(&XLogCtl->recoveryWakeupLatch,

  143. WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,

  144. 5000L, WAIT_EVENT_RECOVERY_WAL_ALL);

  145. ResetLatch(&XLogCtl->recoveryWakeupLatch);

  146. break;

  147. }

  148.  
  149. default:

  150. elog(ERROR, "unexpected WAL source %d", currentSource);

  151. }

  152. HandleStartupProcInterrupts();

  153. }

  154.  
  155. return false; /* not reached */

  156. }

 
  1. static int

  2. XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)

  3. {

  4.  
  5. if (expectedTLEs)

  6. tles = expectedTLEs;

  7. else

  8. tles = readTimeLineHistory(recoveryTargetTLI);

  9. /*

  10. 1、遍历时间线列表里的每一个时间线,从最新的开始

  11. 2、当读取checkpoint的时候,source是XLOG_FROM_ANY

  12. 3、先找归档的日志进行open;如果open失败再找WAL日志进行open

  13. 4、如果都没有open成功,则向前找时间线,open前一个时间线segno和文件号相同的文件进行open

  14. 5、open成功后expectedTLEs被赋值为当前时间线列表的所有值

  15. */

  16. foreach(cell, tles){

  17. TimeLineID tli = ((TimeLineHistoryEntry *) lfirst(cell))->tli;

  18.  
  19. if (tli < curFileTLI)

  20. break; /* don't bother looking at too-old TLIs */

  21.  
  22. if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE){

  23. fd = XLogFileRead(segno, emode, tli,XLOG_FROM_ARCHIVE, true);

  24. if (fd != -1){

  25. elog(DEBUG1, "got WAL segment from archive");

  26. if (!expectedTLEs)

  27. expectedTLEs = tles;

  28. return fd;

  29. }

  30. }

  31.  
  32. if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)

  33. {

  34. fd = XLogFileRead(segno, emode, tli,XLOG_FROM_PG_WAL, true);

  35. if (fd != -1){

  36. if (!expectedTLEs)

  37. expectedTLEs = tles;

  38. return fd;

  39. }

  40. }

  41. }

  42. return -1;

  43. }

  44.  
  45. static int

  46. XLogFileRead(

  47. XLogSegNo segno, /*IN:wal文件号*/

  48. int emode, /*IN:log日志级别*/

  49. TimeLineID tli, /*IN:时间线*/

  50. int source, /*IN:XLOG_FROM_ARCHIVE or XLOG_FROM_PG_WAL or XLOG_FROM_STREAM*/

  51. bool notfoundOk /*IN:XLOG_FROM_ARCHIVE or XLOG_FROM_PG_WAL时为TRUE,XLOG_FROM_STREAM:false*/

  52. )

  53. {

  54. //通过tli、segno拼成日志文件名

  55. XLogFileName(xlogfname, tli, segno);

  56.  
  57. switch (source){

  58. case XLOG_FROM_ARCHIVE:

  59. //InRedo:开始apply redo时为TRUE,结束则false

  60. restoredFromArchive = RestoreArchivedFile(path, xlogfname, "RECOVERYXLOG", XLogSegSize,InRedo);

  61. if (!restoredFromArchive)

  62. return -1;

  63. break;

  64.  
  65. case XLOG_FROM_PG_WAL:

  66. case XLOG_FROM_STREAM:

  67. //路径+wal 文件

  68. XLogFilePath(path, tli, segno);

  69. restoredFromArchive = false;

  70. break;

  71.  
  72. default:

  73. elog(ERROR, "invalid XLogFileRead source %d", source);

  74. }

  75.  
  76. /*

  77. * If the segment was fetched from archival storage, replace the existing

  78. * xlog segment (if any) with the archival version.

  79. */

  80. if (source == XLOG_FROM_ARCHIVE)

  81. {

  82. KeepFileRestoredFromArchive(path, xlogfname);

  83.  
  84. /*

  85. * Set path to point at the new file in pg_wal.

  86. */

  87. snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);

  88. }

  89. //open

  90. fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);

  91. if (fd >= 0)

  92. {

  93. /* Success! */

  94. curFileTLI = tli;

  95.  
  96.  
  97. /* Track source of data in assorted state variables */

  98. readSource = source;

  99. XLogReceiptSource = source;

  100. /* In FROM_STREAM case, caller tracks receipt time, not me */

  101. if (source != XLOG_FROM_STREAM)

  102. XLogReceiptTime = GetCurrentTimestamp();

  103.  
  104. return fd;

  105. }

  106. if (errno != ENOENT || !notfoundOk) /* unexpected failure? */

  107. ereport(PANIC,

  108. (errcode_for_file_access(),

  109. errmsg("could not open file \"%s\": %m", path)));

  110. return -1;

  111.  
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值