启动流程
版本:hbase-2.2.5
说明:分析展现的源码和脚本中会省略一部分,只保留与分析相关联的,感兴趣的可自行查阅。目录是按照启动的调用顺序来进行分析的,由于是直接贴的脚本或源码,相对比较枯燥,所以阅读时间会比较长。
start-hbase.sh
启动 HBase 的入口,有两种模式:单机模式和集群模式,何种模式取决于用户的配置,下文会详细说明。
# 获取当前路径,即 {HBASE_HOME}/bin
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin">/dev/null; pwd`
# 加载 bin 目录下的 hbase-config.sh 文件
. "$bin"/hbase-config.sh
# 判断加载 hbase-config.sh 是否成功,失败则退出,通常最后命令的退出状态为 0 表示没有错误
errCode=$?
if [ $errCode -ne 0 ]
then
exit $errCode
fi
# 此处用户一般不传参,所以默认将 start 赋值给 commandToRun
if [ "$1" = "autostart" ]
then
commandToRun="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autostart"
else
commandToRun="start"
fi
# 通过 HBase 源码中的 HBaseConfTool 获取 conf/hbase-site.xml 中参数 hbase.cluster.distributed 的配置值,表示是否为集群模式,接下文附 1
distMode=`$bin/hbase --config "$HBASE_CONF_DIR" org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
# 当 distMode 为 false 时,启动单机测试版,此时 HMaster 和 HRegionServer 以及内嵌的 MiniZooKeeperCluster 均在同一个 JVM 里启动,接下文附 2
if [ "$distMode" == 'false' ]
then
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" $commandToRun master
# 当该值为 true 时,启动 HBase 集群,接下文附 2。分别启动 Zookeeper、HMaster 和 HRegionServer,其中 Zookeeper 的启动情况分两种,一种是 HBase 管理的,一种是独立部署的,取决于是否在 hbase-env.sh 中配置 HBASE_MANAGES_ZK 参数,为 true 时由 HBase 管理。
else
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" $commandToRun zookeeper
"$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" $commandToRun master
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_REGIONSERVERS}" $commandToRun regionserver
"$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
--hosts "${HBASE_BACKUP_MASTERS}" $commandToRun master-backup
fi
附 1
调用 HBaseConfTool 及相关的 HBaseConfiguration 源码部分,可以清楚地看到读取了配置文件 hbase-default.xml 和 hbase-site.xml,通过脚本传入的 key 来获取相应的 value
public class HBaseConfTool {
public static void main(String args[]) {
if (args.length < 1) {
System.err.println("Usage: HBaseConfTool <CONFIGURATION_KEY>");
System.exit(1);
return;
}
Configuration conf = HBaseConfiguration.create();
System.out.println(conf.get(args[0]));
}
}
---
public class HBaseConfiguration extends Configuration {
public static Configuration create() {
conf.setClassLoader(HBaseConfiguration.class.getClassLoader());
return addHbaseResources(conf);
}
public static Configuration addHbaseResources(Configuration conf) {
conf.addResource("hbase-default.xml");
conf.addResource("hbase-site.xml");
checkDefaultsVersion(conf);
return conf;
}
}
附 2
由 HMaster 接受脚本传入的参数,调用 ServerCommandLine 中的 doMain 方法解析后再通过 HMasterCommandLine 进行启动,单机版和集群版仅仅是在 HMasterCommandLine 中的 run 方法中判断后走了不同的逻辑。
public class HMaster extends HRegionServer implements MasterServices {
public static void main(String [] args) {
LOG.info("STARTING service " + HMaster.class.getSimpleName());
VersionInfo.logVersion();
new HMasterCommandLine(HMaster.class).doMain(args);
}
}
---
public abstract class ServerCommandLine extends Configured implements Tool {
public void doMain(String args[]) {
try {
// 加载 HBase 的配置文件,并调用 ToolRunner 类
int ret = ToolRunner.run(HBaseConfiguration.create(), this, args);
if (ret != 0) {
System.exit(ret);
}
} catch (Exception e) {
LOG.error("Failed to run", e);
System.exit(-1);
}
}
}
---
public class ToolRunner {
public static int run(Configuration conf, Tool tool, String[] args) throws Exception {
……
// 调用 HMasterCommandLine 的 run 方法
return tool.run(toolArgs);
}
}
---
public class HMasterCommandLine extends ServerCommandLine {
public int run(String args[]) throws Exception {
// 添加默认参数
……
CommandLine cmd;
try {
// 解析参数,失败则最终会调用 HMasterCommandLine 的 getUsage 方法返回操作指示,此处将 start 作为 args 加入到 cmd 中
cmd = new GnuParser().parse(opt, args);
} catch (ParseException e) {
LOG.error("Could not parse: ", e);
usage(null);
return 1;
}
// 配置参数
……
// 最终解析完成的剩下的参数,此处为 start
@SuppressWarnings("unchecked")
List<String> remainingArgs = cmd.getArgList();
if (remainingArgs.size() != 1) {
usage(null);
return 1;
}
String command = remainingArgs.get(0);
// 根据接收到的 command 调用相应方法
if ("start".equals(command)) {
return startMaster();
} else if ("stop".equals(command)) {
return stopMaster();
} else if ("clear".equals(command)) {
return (ZNodeClearer.clear(getConf()) ? 0 : 1);
} else {
usage("Invalid command: " + command);
return 1;
}
}
private int startMaster() {
// 获取配置参数
Configuration conf = getConf();
// TraceUtil 是个包装类,以一种简化的方式提供了访问 htrace 4+ 的函数,Apache HTrace 是 Cloudera 开源出来的一个分布式系统跟踪框架,支持HDFS和HBase等系统,为应用提供请求跟踪和性能分析
TraceUtil.initTracer(conf);
try {
// 这里从配置文件中识别出当前是单机模式还是集群模式,单机模式下指的是 LocalHBaseCluster 实例,会在同一个 JVM 里启动 Master 和 RegionServer
if (LocalHBaseCluster.isLocal(conf)) {
DefaultMetricsSystem.setMiniClusterMode(true);
// 单机模式下启动 MiniZooKeeperCluster 作为 Zookeeper 服务,该类中的许多代码都是从 Zookeeper 的测试代码中剥离出来的
final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf);
// 从配置文件获取 hbase.zookeeper.property.dataDir 配置的参数作为 Zookeeper 数据目录
File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR));
// find out the default client port
int zkClientPort = 0;
// 从 hbase.zookeeper.quorum 参数解析并获取 Zookeeper 配置的端口号
String zkserver = conf.get(HConstants.ZOOKEEPER_QUORUM);
if (zkserver != null) {
String[] zkservers = zkserver.split(",");
// 单机模式仅支持一个 Zookeeper 服务
if (zkservers.length > 1) {
// In local mode deployment, we have the master + a region server and zookeeper server
// started in the same process. Therefore, we only support one zookeeper server.
String errorMsg = "Could not start ZK with " + zkservers.length +
" ZK servers in local mode deployment. Aborting as clients (e.g. shell) will not "
+ "be able to find this ZK quorum.";
System.err.println(errorMsg);
throw new IOException(errorMsg);
}
String[] parts = zkservers[0].split(":");
if (parts.length == 2) {
// the second part is the client port
zkClientPort = Integer.parseInt(parts [1]);
}
}
// If the client port could not be find in server quorum conf, try another conf
if (zkClientPort == 0) {
zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0);
// The client port has to be set by now; if not, throw exception.
if (zkClientPort == 0) {
throw new IOException("No config value for " + HConstants.ZOOKEEPER_CLIENT_PORT);
}
}
zooKeeperCluster.setDefaultClientPort(zkClientPort);
// set the ZK tick time if specified
int zkTickTime = conf.getInt(HConstants.ZOOKEEPER_TICK_TIME, 0);
if (zkTickTime > 0) {
zooKeeperCluster.setTickTime(zkTickTime);
}
// 如果启用了安全认证,需要配置 Zookeeper 的 keytab 文件和 principal 等
// login the zookeeper server principal (if using security)
ZKUtil.loginServer(conf, HConstants.ZK_SERVER_KEYTAB_FILE,
HConstants.ZK_SERVER_KERBEROS_PRINCIPAL, null);
int localZKClusterSessionTimeout =
conf.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster", 10*1000);
conf.setInt(HConstants.ZK_SESSION_TIMEOUT, localZKClusterSessionTimeout);
LOG.info("Starting a zookeeper cluster");
// 启动 Zookeeper 服务
int clientPort = zooKeeperCluster.startup(zkDataPath);
// Zookeeper 启动失败会输出相应信息
if (clientPort != zkClientPort) {
String errorMsg = "Could not start ZK at requested port of " +
zkClientPort + ". ZK was started at port: " + clientPort +
". Aborting as clients (e.g. shell) will not be able to find " +
"this ZK quorum.";
System.err.println(errorMsg);
throw new IOException(errorMsg);
}
// 启动成功则设置 HBase 有关 Zookeeper 的参数
conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
// Need to have the zk cluster shutdown when master is shutdown.
// Run a subclass that does the zk cluster shutdown on its way out.
int mastersCount = conf.getInt("hbase.masters", 1);
int regionServersCount = conf.getInt("hbase.regionservers", 1);
// Set start timeout to 5 minutes for cmd line start operations
conf.setIfUnset("hbase.master.start.timeout.localHBaseCluster", "300000");
LOG.info("Starting up instance of localHBaseCluster; master=" + mastersCount +
", regionserversCount=" + regionServersCount);
// LocalHMaster 继承自 HMaster,和 HRegionServer 同时启动,在停止的同时也停止 Zookeeper 服务
LocalHBaseCluster cluster = new LocalHBaseCluster(conf, mastersCount, regionServersCount,
LocalHMaster.class, HRegionServer.class);
// 将运行的 zooKeeperCluster 置于 LocalHMaster 中,以便在 LocalHMaster 停止的时候停止 Zookeeper 服务
((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster);
// 调用 LocalHBaseCluster 的 startup 方法启动
cluster.startup();
waitOnMasterThreads(cluster);
} else {
// 启动集群模式
// 记录有关当前正在运行的JVM进程的信息,包括环境变量,可以通过配置 hbase.envvars.logging.disabled 为 true 禁用
logProcessInfo(getConf());
// 通过反射 HMaster 的构造方法对其进行实例化
HMaster master = HMaster.constructMaster(masterClass, conf);
// 如果此时请求关闭 HMaster 则不会启动
if (master.isStopped()) {
LOG.info("Won't bring the Master up as a shutdown is requested");
return 1;
}
// 启动 HMaster,调用 HMaster 的 run 方法进行处理
master.start();
// 等待 HMaster 启动成功
master.join();
// 异常信息则输出错误信息
if(master.isAborted())
throw new RuntimeException("HMaster Aborted");
}
} catch (Throwable t) {
LOG.error("Master exiting", t);
return 1;
}
return 0;
}
// 由 HMasterCommandLine 在 startMaster 方法中启动单机模式的 HBase 时调用
private void waitOnMasterThreads(LocalHBaseCluster cluster) throws InterruptedException{
List<JVMClusterUtil.MasterThread> masters = cluster.getMasters();
List<JVMClusterUtil.RegionServerThread> regionservers = cluster.getRegionServers();
if (masters != null) {
for (JVMClusterUtil.MasterThread t : masters) {
// 先等待 MasterThread 启动完成再启动 RegionServerThread,如果出现异常则关闭 RegionServer 并输出错误信息
t.join();
if(t.getMaster().isAborted()) {
closeAllRegionServerThreads(regionservers)