Active Namenode
NamenodeRpcServer.rollingUpgrade
try {
checkOperation(OperationCategory.WRITE);
if (isRollingUpgrade()) {
return rollingUpgradeInfo;
}
long startTime = now();
if (!haEnabled) { // for non-HA, we require NN to be in safemode
startRollingUpgradeInternalForNonHA(startTime);
} else { // for HA, NN cannot be in safemode
checkNameNodeSafeMode("Failed to start rolling upgrade");
startRollingUpgradeInternal(startTime);
}
getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
if (haEnabled) {
// roll the edit log to make sure the standby NameNode can tail
getFSImage().rollEditLog();
}
} finally {
writeUnlock(operationName);
}
getEditLog().logSync();
FSNamesystem.startRollingUpgradeInternal
/**
* Update internal state to indicate that a rolling upgrade is in progress.
* @param startTime rolling upgrade start time
*/
void startRollingUpgradeInternal(long startTime)
throws IOException {
checkRollingUpgrade("start rolling upgrade");
getFSImage().checkUpgrade(); // Check if upgrade is in progress.
setRollingUpgradeInfo(false, startTime);
}
FSNamesystem.setRollingUpgradeInfo
void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
createdRollbackImages, startTime, 0L);
}
Standby Namenode
Stand by namenode monitors the edit log, when apply OP_ROLLING_UPGRADE_START
event, it calls fsNamesys.startRollingUpgradeInternal(startTime);
and fsNamesys.triggerRollbackCheckpoint();
- FSEditLogLoader.applyEditLogOp
case OP_ROLLING_UPGRADE_START: {
if (startOpt == StartupOption.ROLLINGUPGRADE) {
final RollingUpgradeStartupOption rollingUpgradeOpt
= startOpt.getRollingUpgradeStartupOption();
if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
throw new RollingUpgradeOp.RollbackException();
} else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
//ignore upgrade marker
break;
}
}
// start rolling upgrade
final long startTime = ((RollingUpgradeOp) op).getTime();
fsNamesys.startRollingUpgradeInternal(startTime);
fsNamesys.triggerRollbackCheckpoint();
break;
- startRollingUpgradeInternal sets rolling upgrade info
- triggerRollbackCheckpoint interrupts the CheckpointerThread, it will trigger a interrupt exception in CheckpointerThread, so let CheckpointerThread start work immediately from all kinds of sleep or wait.
CheckpointerThread ignores the interrupt.
private void doWork() {
final long checkPeriod = 1000 * checkpointConf.getCheckPeriod();
// Reset checkpoint time so that we don't always checkpoint
// on startup.
lastCheckpointTime = monotonicNow();
while (shouldRun) {
boolean needRollbackCheckpoint = namesystem.isNeedRollbackFsImage();
if (!needRollbackCheckpoint) {
try {
Thread.sleep(checkPeriod);
} catch (InterruptedException ie) {
}
if (!shouldRun) {
break;
}
}
try {
// We may have lost our ticket since last checkpoint, log in again, just in case
if (UserGroupInformation.isSecurityEnabled()) {
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
}
final long now = monotonicNow();
final long uncheckpointed = countUncheckpointedTxns();
final long secsSinceLast = (now - lastCheckpointTime) / 1000;
boolean needCheckpoint = needRollbackCheckpoint;
if (needCheckpoint) {
LOG.info("Triggering a rollback fsimage for rolling upgrade.");
} else if (uncheckpointed >= checkpointConf.getTxnCount()) {
LOG.info("Triggering checkpoint because there have been " +
uncheckpointed + " txns since the last checkpoint, which " +
"exceeds the configured threshold " +
checkpointConf.getTxnCount());
needCheckpoint = true;
} else if (secsSinceLast >= checkpointConf.getPeriod()) {
LOG.info("Triggering checkpoint because it has been " +
secsSinceLast + " seconds since the last checkpoint, which " +
"exceeds the configured interval " + checkpointConf.getPeriod());
needCheckpoint = true;
}
synchronized (cancelLock) {
if (now < preventCheckpointsUntil) {
LOG.info("But skipping this checkpoint since we are about to failover!");
canceledCount++;
continue;
}
assert canceler == null;
canceler = new Canceler();
}
if (needCheckpoint) {
doCheckpoint();
// reset needRollbackCheckpoint to false only when we finish a ckpt
// for rollback image
if (needRollbackCheckpoint
&& namesystem.getFSImage().hasRollbackFSImage()) {
namesystem.setCreatedRollbackImages(true);
namesystem.setNeedRollbackFsImage(false);
}
lastCheckpointTime = now;
}
} catch (SaveNamespaceCancelledException ce) {
LOG.info("Checkpoint was cancelled: " + ce.getMessage());
canceledCount++;
} catch (InterruptedException ie) {
LOG.info("Interrupted during checkpointing", ie);
// Probably requested shutdown.
continue;
} catch (Throwable t) {
LOG.error("Exception in doCheckpoint", t);
} finally {
synchronized (cancelLock) {
canceler = null;
}
}
}
}
The standby namenode will generate roll back image and transfer it to active namenode.
Datanode handles for rollingUpgradeInfo.
The data node send heart beat periodically. NamenodeRpcServer.handleHeartbeat sends rollingUpgradeInfo to data node, and datanode will generate the preversion for upgrade.
NamenodeRpcServer.handleHeartbeat
HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
StorageReport[] reports, long cacheCapacity, long cacheUsed,
int xceiverCount, int xmitsInProgress, int failedVolumes,
VolumeFailureSummary volumeFailureSummary) throws IOException {
readLock();
try {
//get datanode commands
final int maxTransfer = blockManager.getMaxReplicationStreams()
- xmitsInProgress;
DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary);
//create ha status
final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
haContext.getState().getServiceState(),
getFSImage().getCorrectLastAppliedOrWrittenTxId());
return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
} finally {
readUnlock("handleHeartbeat");
}
}
After execute hdfs dfsadmin - -rollingUpgrade prepare
, the namenode show the following contents.
Rolling upgrade started at 2018/8/13 下午4:50:22.
Rollback image has not been created.
The namenode show the following contents after the rollback image has been created.
Rolling upgrade started at 2018/8/13 下午4:50:22.
Rollback image has been created. Proceed to upgrade daemons.
Finalize upgrade
Command:
hdfs dfsadmin -finalizeUpgrade
Active namenode
FSNamesystem.finalizeRollingUpgrade just set finalizeTime and log the event.
RollingUpgradeInfo finalizeRollingUpgrade() throws IOException {
final String operationName = "finalizeRollingUpgrade";
checkSuperuserPrivilege();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
if (!isRollingUpgrade()) {
return null;
}
checkNameNodeSafeMode("Failed to finalize rolling upgrade");
finalizeRollingUpgradeInternal(now());
getEditLog().logFinalizeRollingUpgrade(rollingUpgradeInfo.getFinalizeTime());
if (haEnabled) {
// roll the edit log to make sure the standby NameNode can tail
getFSImage().rollEditLog();
}
getFSImage().updateStorageVersion();
getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
NameNodeFile.IMAGE);
} finally {
writeUnlock(operationName);
}
if (!haEnabled) {
// Sync not needed for ha since the edit was rolled after logging.
getEditLog().logSync();
}
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
logAuditEvent(true, operationName, null, null, null);
}
return rollingUpgradeInfo;
}
void finalizeRollingUpgradeInternal(long finalizeTime) {
// Set the finalize time
rollingUpgradeInfo.finalize(finalizeTime);
}
Standby namenode
- FSEditLogLoader.applyEditLogOp
case OP_ROLLING_UPGRADE_FINALIZE: {
final long finalizeTime = ((RollingUpgradeOp) op).getTime();
if (fsNamesys.isRollingUpgrade()) {
// Only do it when NN is actually doing rolling upgrade.
// We can get FINALIZE without corresponding START, if NN is restarted
// before this op is consumed and a new checkpoint is created.
fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
}
fsNamesys.getFSImage().updateStorageVersion();
fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
NameNodeFile.IMAGE);
break;
}
after set the finalized time, the rollingUpgradeInfo.isFinalized()
returns true, and then isRollingUpgrade returns false.
public boolean isRollingUpgrade() {
return rollingUpgradeInfo != null && !rollingUpgradeInfo.isFinalized();
}