List of articles
一.前言
ZKFailoverController作为主备namenode的控制器,负责对namenode进行监控和主备切换,目前这个切换是依赖于zookeeper的,当然目前也支持手工切换主备namenode。
二.启动入口
启动入口其实就是一个main函数.也就是说ZKFailoverController就是一个jar程序.
实现类是DFSZKFailoverController
public static void main(String args[])
throws Exception {
StringUtils.startupShutdownMessage(DFSZKFailoverController.class,
args, LOG);
if (DFSUtil.parseHelpArgument(args,
ZKFailoverController.USAGE, System.out, true)) {
System.exit(0);
}
GenericOptionsParser parser = new GenericOptionsParser(
new HdfsConfiguration(), args);
try {
//构建DFSZKFailoverController
DFSZKFailoverController zkfc = DFSZKFailoverController.create( parser.getConfiguration());
// zkfc.run 启动
System.exit(zkfc.run(parser.getRemainingArgs()));
} catch (Throwable t) {
LOG.error("DFSZKFailOverController exiting due to earlier exception "
+ t);
terminate(1, t);
}
}
三. doRun
doRun里面有四个方法会比较重要
3.1.initZK 初始化ZK,构建连接信息
private void initZK() throws HadoopIllegalArgumentException, IOException,
KeeperException {
// 获取zk 集群信息
zkQuorum = conf.get(ZK_QUORUM_KEY);
// zk超时时间
// ha.zookeeper.session-timeout.ms : 10ms
int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY,
ZK_SESSION_TIMEOUT_DEFAULT);
// Parse ACLs from configuration.
// zookeeper ACL认证
// ha.zookeeper.acl : world:anyone:rwcda
String zkAclConf = conf.get(ZK_ACL_KEY, ZK_ACL_DEFAULT);
zkAclConf = ZKUtil.resolveConfIndirection(zkAclConf);
List<ACL> zkAcls = ZKUtil.parseACLs(zkAclConf);
if (zkAcls.isEmpty()) {
zkAcls = Ids.CREATOR_ALL_ACL;
}
// 解析授权
// Parse authentication from configuration.
List<ZKAuthInfo> zkAuths = SecurityUtil.getZKAuthInfos(conf, ZK_AUTH_KEY);
// Sanity check configuration.
Preconditions.checkArgument(zkQuorum != null,
"Missing required configuration '%s' for ZooKeeper quorum",
ZK_QUORUM_KEY);
Preconditions.checkArgument(zkTimeout > 0,
"Invalid ZK session timeout %s", zkTimeout);
// 最大重试次数 3
// ha.failover-controller.active-standby-elector.zk.op.retries : 3
int maxRetryNum = conf.getInt(
CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_KEY,
CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT);
// getParentZnode : /hadoop-ha/ [namenode serviceId]
// 构建ActiveStandbyElector
elector = new ActiveStandbyElector(zkQuorum,
zkTimeout, getParentZnode(), zkAcls, zkAuths,
new ElectorCallbacks(), maxRetryNum);
}
3.2.formatZK(force, interactive);
格式化zk,其实就是构建工作空间.
默认工作空间为: ** /hadoop-ha/[namenode cluster id]**
private int formatZK(boolean force, boolean interactive)
throws IOException, InterruptedException, KeeperException {
if (elector.parentZNodeExists()) {
if (!force && (!interactive || !confirmFormat())) {
return ERR_CODE_FORMAT_DENIED;
}
try {
elector.clearParentZNode();
} catch (IOException e) {
LOG.error("Unable to clear zk parent znode", e);
return 1;
}
}
elector.ensureParentZNode();
return 0;
}
3.3. initRPC() 初始化ZKFCRpcServer
protected void initRPC() throws IOException {
InetSocketAddress bindAddr = getRpcAddressToBindTo();
rpcServer = new ZKFCRpcServer(conf, bindAddr, this, getPolicyProvider());
}
3.4. initHM 开启健康检查 HealthMonitor
健康检查是一个定时线程,会定时检查所属namenode的健康状态,获取namonode的状态之后,如果健康的话,会尝试获取zk锁,如果拿到zk锁的话,该zkfc所在的namenode会自动进入active状态.否则依旧是standy状态.
private void initHM() {
healthMonitor = new HealthMonitor(conf, localTarget);
healthMonitor.addCallback(new HealthCallbacks());
healthMonitor.addServiceStateCallback(new ServiceStateCallBacks());
healthMonitor.start();
}
3.5. 启动ZKFCRpcServer
startRPC();
protected void startRPC() throws IOException {
rpcServer.start();
}
四. HealthMonitor
健康检查(HealthMonitor)是一个线程,会定时与namenode进行通讯,然后根据反馈的namenode状态进行不同的操作. 默认检查周期 1秒 ( ha.health-monitor.check-interval.ms : 1000 )
核心的方法为: recheckElectability
/**
* Check the current state of the service, and join the election
* if it should be in the election.
*/
private void recheckElectability() {
// Maintain lock ordering of elector -> ZKFC
synchronized (elector) {
synchronized (this) {
boolean healthy = lastHealthState == State.SERVICE_HEALTHY;
long remainingDelay = delayJoiningUntilNanotime - System.nanoTime();
if (remainingDelay > 0) {
if (healthy) {
LOG.info("Would have joined master election, but this node is " +
"prohibited from doing so for " +
TimeUnit.NANOSECONDS.toMillis(remainingDelay) + " more ms");
}
scheduleRecheck(remainingDelay);
return;
}
switch (lastHealthState) {
// namenode健康, 自动参与选举.尝试获取zk锁.
case SERVICE_HEALTHY:
elector.joinElection(targetToData(localTarget));
if (quitElectionOnBadState) {
quitElectionOnBadState = false;
}
break;
case INITIALIZING:
LOG.info("Ensuring that " + localTarget + " does not " +
"participate in active master election");
elector.quitElection(false);
serviceState = HAServiceState.INITIALIZING;
break;
case SERVICE_UNHEALTHY:
case SERVICE_NOT_RESPONDING:
LOG.info("Quitting master election for " + localTarget +
" and marking that fencing is necessary");
elector.quitElection(true);
serviceState = HAServiceState.INITIALIZING;
break;
case HEALTH_MONITOR_FAILED:
fatalError("Health monitor failed!");
break;
default:
throw new IllegalArgumentException("Unhandled state:"
+ lastHealthState);
}
}
}
}