一、shell调用梳理
启动hdfs的shell脚本是
${HADOOP_HDFS_HOME}/sbin/start-dfs.sh
其中在启动namenode、datanode、secondarynamenode、journalnode、zkfc等服务时都是调用了
${HADOOP_PREFIX}/sbin/hadoop-daemons.sh
其中会首先调用hadoop-config.sh,在这里加载了各种配置
然后调用
最终调用了/bin/hdfs
在hdfs脚本中,直接使用java命令调用了
org.apache.hadoop.hdfs.server.namenode.NameNode
org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
org.apache.hadoop.hdfs.server.datanode.DataNode
等类
二、NameNode类分析
首先大致浏览了一下这个类,其中看到感兴趣的类就点进去大致看一眼,总体上看这个类的内容写的很明白
然后开始从入口看代码
public static void main(String argv[]) throws Exception {
//判断参数是否是打印帮助信息
if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
System.exit(0);
}
try {
//打印startup信息,并且增加shutdown的钩子函数(该函数打印shutdown信息)
StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
//所有namenode相关启动命令调用统一的方法
NameNode namenode = createNameNode(argv, null);
// 如果namenode不为null,则表明是调用的启动namenode方法。这时候应该调用namenode的join方法等待服务关闭
if (namenode != null) {
namenode.join();
}
} catch (Throwable e) {
LOG.fatal("Failed to start namenode.", e);
terminate(1, e);
}
}
namenode初始化代码
protected NameNode(Configuration conf, NamenodeRole role)
throws IOException {
this.conf = conf;
this.role = role;
setClientNamenodeAddress(conf);
String nsId = getNameServiceId(conf);
String namenodeId = HAUtil.getNameNodeId(conf, nsId);
this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
state = createHAState(getStartupOption(conf));
this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
this.haContext = createHAContext();
try {
initializeGenericKeys(conf, nsId, namenodeId);
//这里真正初始化namenode服务
initialize(conf);
try {
haContext.writeLock();
state.prepareToEnterState(haContext);
state.enterState(haContext);
} finally {
haContext.writeUnlock();
}
} catch (IOException e) {
this.stop();
throw e;
} catch (HadoopIllegalArgumentException e) {
this.stop();
throw e;
}
this.started.set(true);
}
真正的初始化操作
protected void initialize(Configuration conf) throws IOException {
if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
if (intervals != null) {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
intervals);
}
}
//设置ugi
UserGroupInformation.setConfiguration(conf);
loginAsNameNodeUser(conf);
//统计namenode各种信息
NameNode.initMetrics(conf, this.getRole());
StartupProgressMetrics.register(startupProgress);
//如果是正常服务的namenode,需要为他启动一个httpserver(namenode的web界面,一路点进去可以看到默认端口是50070)
if (NamenodeRole.NAMENODE == role) {
startHttpServer(conf);
}
this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
//从硬盘中实例化namesystem,namesystem中初始化了BlockManager、CacheManager等
loadNamesystem(conf);
//创建rpc服务,主要是创建了serviceRpcServer和clientRpcServer分别监听datanode和客户端的请求
rpcServer = createRpcServer(conf);
if (clientNamenodeAddress == null) {
// This is expected for MiniDFSCluster. Set it now using
// the RPC server's bind address.
clientNamenodeAddress =
NetUtils.getHostPortString(rpcServer.getRpcAddress());
LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
+ " this namenode/service.");
}
if (NamenodeRole.NAMENODE == role) {
httpServer.setNameNodeAddress(getNameNodeAddress());
httpServer.setFSImage(getFSImage());
}
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor.start();
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
// 启动rpc等服务(以及standby的namenode的相关服务)
startCommonServices(conf);
}
启动其他的一些通用服务
private void startCommonServices(Configuration conf) throws IOException {
//启动namesystem的相关服务,包括检查硬盘容量等
// 另外,进入SafeMode状态,等待datanode报告block情况
namesystem.startCommonServices(conf, haContext);
registerNNSMXBean();
//为standby的namenode启动http服务
if (NamenodeRole.NAMENODE != role) {
startHttpServer(conf);
httpServer.setNameNodeAddress(getNameNodeAddress());
httpServer.setFSImage(getFSImage());
}
//启动rpc服务
rpcServer.start();
//加载rpc服务插件(自定义rpc调用方法)
plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
ServicePlugin.class);
for (ServicePlugin p: plugins) {
try {
p.start(this);
} catch (Throwable t) {
LOG.warn("ServicePlugin " + p + " could not be started", t);
}
}
LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
if (rpcServer.getServiceRpcAddress() != null) {
LOG.info(getRole() + " service RPC up at: "
+ rpcServer.getServiceRpcAddress());
}
}
DataNode启动流程分析
DataNode类源码学习
这个类代码比较多,就不全部浏览了,先把开头部分的参数以及构造器大致浏览一下,然后看main方法
这一句比较有意思:
* DataNodes spend their lives in an endless loop of asking
* the NameNode for something to do. A NameNode cannot connect
* to a DataNode directly; a NameNode simply returns values from
* functions invoked by a DataNode.
main方法
public static void main(String args[]) {
//判断参数是不是打印帮助信息,如果是,打印帮助信息并退出
if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
System.exit(0);
}
secureMain(args, null);
}
secureMain调用了createDataNode方法
public static DataNode createDataNode(String args[], Configuration conf,
SecureResources resources) throws IOException {
//创建datanode
DataNode dn = instantiateDataNode(args, conf, resources);
if (dn != null) {
//启动datanode内部服务
dn.runDatanodeDaemon();
}
return dn;
}
instantiateDataNode方法
public static DataNode instantiateDataNode(String args [], Configuration conf,
SecureResources resources) throws IOException {
if (conf == null)
conf = new HdfsConfiguration();
if (args != null) {
// parse generic hadoop options
GenericOptionsParser hParser = new GenericOptionsParser(conf, args);
args = hParser.getRemainingArgs();
}
if (!parseArguments(args, conf)) {
printUsage(System.err);
return null;
}
//取得datanode的实际数据存放位置
Collection<StorageLocation> dataLocations = getStorageLocations(conf);
UserGroupInformation.setConfiguration(conf);
SecurityUtil.login(conf, DFS_DATANODE_KEYTAB_FILE_KEY,
DFS_DATANODE_KERBEROS_PRINCIPAL_KEY);
//创建实例
return makeInstance(dataLocations, conf, resources);
}
makeInstance方法
static DataNode makeInstance(Collection<StorageLocation> dataDirs,
Configuration conf, SecureResources resources) throws IOException {
LocalFileSystem localFS = FileSystem.getLocal(conf);
//本地目录权限,默认是700
FsPermission permission = new FsPermission(
conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
//用来创建本地目录和检查目录权限
DataNodeDiskChecker dataNodeDiskChecker =
new DataNodeDiskChecker(permission);
//返回经过检查后没问题的目录(检查过程中可能会创建该目录)
List<StorageLocation> locations =
checkStorageLocations(dataDirs, localFS, dataNodeDiskChecker);
DefaultMetricsSystem.initialize("DataNode");
assert locations.size() > 0 : "number of data directories should be > 0";
//创建dataNode对象
return new DataNode(conf, locations, resources);
}
DataNode构造方法
DataNode(final Configuration conf,
final List<StorageLocation> dataDirs,
final SecureResources resources) throws IOException {
super(conf);
this.lastDiskErrorCheck = 0;
this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY,
DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT);
//配置各种参数
this.usersWithLocalPathAccess = Arrays.asList(
conf.getTrimmedStrings(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY));
this.connectToDnViaHostname = conf.getBoolean(
DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME,
DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
this.getHdfsBlockLocationsEnabled = conf.getBoolean(
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
this.supergroup = conf.get(DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
//是否启用权限
this.isPermissionEnabled = conf.getBoolean(
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY,
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT);
confVersion = "core-" +
conf.get("hadoop.common.configuration.version", "UNSPECIFIED") +
",hdfs-" +
conf.get("hadoop.hdfs.configuration.version", "UNSPECIFIED");
// Determine whether we should try to pass file descriptors to clients.
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
String reason = DomainSocket.getLoadingFailureReason();
if (reason != null) {
LOG.warn("File descriptor passing is disabled because " + reason);
this.fileDescriptorPassingDisabledReason = reason;
} else {
LOG.info("File descriptor passing is enabled.");
this.fileDescriptorPassingDisabledReason = null;
}
} else {
this.fileDescriptorPassingDisabledReason =
"File descriptor passing was not configured.";
LOG.debug(this.fileDescriptorPassingDisabledReason);
}
try {
// 取hostname,优先从dfs.datanode.hostname配置取
hostName = getHostName(conf);
LOG.info("Configured hostname is " + hostName);
// 启动datanode
startDataNode(conf, dataDirs, resources);
} catch (IOException ie) {
shutdown();
throw ie;
}
}
startDataNode方法
void startDataNode(Configuration conf,
List<StorageLocation> dataDirs,
SecureResources resources
) throws IOException {
// settings global for all BPs in the Data Node
this.secureResources = resources;
synchronized (this) {
this.dataDirs = dataDirs;
}
this.conf = conf;
this.dnConf = new DNConf(conf);
checkSecureConfig(dnConf, conf, resources);
//这个不太明白是干什么的?
this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
if (dnConf.maxLockedMemory > 0) {
if (!NativeIO.POSIX.getCacheManipulator().verifyCanMlock()) {
throw new RuntimeException(String.format(
"Cannot start datanode because the configured max locked memory" +
" size (%s) is greater than zero and native code is not available.",
DFS_DATANODE_MAX_LOCKED_MEMORY_KEY));
}
if (Path.WINDOWS) {
NativeIO.Windows.extendWorkingSetSize(dnConf.maxLockedMemory);
} else {
long ulimit = NativeIO.POSIX.getCacheManipulator().getMemlockLimit();
if (dnConf.maxLockedMemory > ulimit) {
throw new RuntimeException(String.format(
"Cannot start datanode because the configured max locked memory" +
" size (%s) of %d bytes is more than the datanode's available" +
" RLIMIT_MEMLOCK ulimit of %d bytes.",
DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
dnConf.maxLockedMemory,
ulimit));
}
}
}
LOG.info("Starting DataNode with maxLockedMemory = " +
dnConf.maxLockedMemory);
//存储数据相关的类,规定了一些存储本地文件时的格式,比如以subdir开头的文件夹、以blk_开头的块文件等
storage = new DataStorage();
// global DN settings
registerMXBean();
//创建块文件接收器服务
initDataXceiver(conf);
// 启动web页面,默认端口是50075
startInfoServer(conf);
// 启动一个java监控线程,监控java虚拟机(因为垃圾回收等)暂停的次数,
// 内部实现是一个线程每隔一段时间就休眠一下,如果休眠时间明显比规定时间长,则判断jvm暂停过
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor.start();
// 为每个block pool管理一个BlockTokenSecretManager
// BlockPoolTokenSecretManager is required to create ipc server.
this.blockPoolTokenSecretManager = new BlockPoolTokenSecretManager();
// Login is done by now. Set the DN user name.
dnUserName = UserGroupInformation.getCurrentUser().getShortUserName();
LOG.info("dnUserName = " + dnUserName);
LOG.info("supergroup = " + supergroup);
//初始化ipc服务(用于通信)
initIpcServer(conf);
//监控数据
metrics = DataNodeMetrics.create(conf, getDisplayName());
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
// 管理BPOfferService相关
blockPoolManager = new BlockPoolManager(this);
// 这里向namenode注册了自己,并开始轮询namenode请求待执行命令?
blockPoolManager.refreshNamenodes(conf);
// Create the ReadaheadPool from the DataNode context so we can
// exit without having to explicitly shutdown its thread pool.
readaheadPool = ReadaheadPool.getInstance();
saslClient = new SaslDataTransferClient(dnConf.conf,
dnConf.saslPropsResolver, dnConf.trustedChannelResolver);
saslServer = new SaslDataTransferServer(dnConf, blockPoolTokenSecretManager);
}
创建完DataNode后,调用runDatanodeDaemon方法
public void runDatanodeDaemon() throws IOException {
// blockPoolManager服务启动,并开始轮询namenode请求待执行命令?
blockPoolManager.startAll();
// 数据传输服务启动
// start dataXceiveServer
dataXceiverServer.start();
if (localDataXceiverServer != null) {
localDataXceiverServer.start();
}
// ipc服务启动
ipcServer.start();
// 插件启动
startPlugins(conf);
}