dn不多做介绍,启动流程相对简单,通过main方法开始,一步一步去看源码,本文源码解析只展示重要方法
顺便求志同道合者,致力于大数据存储与计算
有无交流群共同学习(Hadoop源码/性能调优/生产环境真实场景)
DN流程图
源码解析
定位到DataNode.java,DN也是一个main程序
public static void main(String args[]) {
if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
System.exit(0);
}
secureMain(args, null);
}
进入secureMain(args, null),创建DN
public static void secureMain(String args[], SecureResources resources) {
int errorCode = 0;
try {
StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
//创建DN
DataNode datanode = createDataNode(args, null, resources);
if (datanode != null) {
//阻塞
datanode.join();
}
}
}
进入createDataNode(args, null, resources),创建DN
/** Instantiate & Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
@VisibleForTesting
@InterfaceAudience.Private
public static DataNode createDataNode(String args[], Configuration conf,
SecureResources resources) throws IOException {
//初始化DN
DataNode dn = instantiateDataNode(args, conf, resources);
if (dn != null) {
//启动DN
dn.runDatanodeDaemon();
}
return dn;
}
进入instantiateDataNode(args, conf, resources),创建DN
/** Instantiate a single datanode object, along with its secure resources.
* This must be run by invoking{@link DataNode#runDatanodeDaemon()}
* subsequently.
*/
public static DataNode instantiateDataNode(String args [], Configuration conf,
SecureResources resources) throws IOException {
//...配置相关
//实例
return makeInstance(dataLocations, conf, resources);
}
进入makeInstance(dataLocations, conf, resources),检查目录、创建DN
static DataNode makeInstance(Collection<StorageLocation> dataDirs,
Configuration conf, SecureResources resources) throws IOException {
//...
//目录检查,返回可用的磁盘目录列表
locations = storageLocationChecker.check(conf, dataDirs);
//构造DN
return new DataNode(conf, locations, storageLocationChecker, resources);
}
进入new DataNode(conf, locations, storageLocationChecker, resources),配置、创建DN
DataNode(final Configuration conf,
final List<StorageLocation> dataDirs,
final StorageLocationChecker storageLocationChecker,
final SecureResources resources) throws IOException {
//...配置
//启动DN
startDataNode(dataDirs, resources);
//...
}
进入startDataNode(dataDirs, resources),初始化各种线程服务,存储DataStorage/读写相关DataXceiver/RPC相关IpcServer/块池管理blockPoolManager等多个服务
void startDataNode(List<StorageLocation> dataDirectories,
SecureResources resources
) throws IOException {
// 初始化DataStorage
//DataStorage:管理与组织磁盘存储目录,如current,previous,detach,tmp等;
//在DataNode数据目录,可以看到一些current tmp,rbw或者finalized文件夹
//FsDatasetImpl:管理组织数据块和元数据文件
storage = new DataStorage();
// 注册JMX
registerMXBean();
// 初始化DataXceiverServer(流式通信),DataNode#runDatanodeDaemon()中启动
initDataXceiver();
// 启动InfoServer
startInfoServer();
// 启动JVMPauseMonitor(反向监控JVM情况,可通过JMX查询)
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(getConf());
pauseMonitor.start();
initIpcServer();
//ec
ecWorker = new ErasureCodingWorker(getConf(), this);
blockRecoveryWorker = new BlockRecoveryWorker(this);
//创建块池管理对象
blockPoolManager = new BlockPoolManager(this);
//刷新NN
blockPoolManager.refreshNamenodes(getConf());
}
进入blockPoolManager.refreshNamenodes(getConf()),获取NN RPC地址,完成NameNodes的刷新
void refreshNamenodes(Configuration conf)
throws IOException {
//NN RPC地址
try {
newAddressMap =
DFSUtil.getNNServiceRpcAddressesForCluster(conf);
newLifelineAddressMap =
DFSUtil.getNNLifelineRpcAddressesForCluster(conf);
}
synchronized (refreshNamenodesLock) {
doRefreshNamenodes(newAddressMap, newLifelineAddressMap);
}
}
进入doRefreshNamenodes(newAddressMap, newLifelineAddressMap),完成NameNodes的刷新
private void doRefreshNamenodes(
Map<String, Map<String, InetSocketAddress>> addrMap,
Map<String, Map<String, InetSocketAddress>> lifelineAddrMap)
throws IOException {
//确认NN
//移除不存在NN
//遍历联邦,启动新NN,启动BPOfferService,实际启动BPServiceActor线程
//停止 移除NN的BPOfferService线程
//更新NN列表
}
BPServiceActor线程启动,进入BPServiceActor的run方法中,主要是与NN握手和发送心跳服务
public void run() {
// 与namonode握手,注册
connectToNNAndHandshake();
// BPServiceActor提供的服务
offerService();
}
进入connectToNNAndHandshake(),获取NN代理/名称空间信息/
private void connectToNNAndHandshake() throws IOException {
// get NN proxy
bpNamenode = dn.connectToNN(nnAddr);
// First phase of the handshake with NN - get the namespace
// info.
NamespaceInfo nsInfo = retrieveNamespaceInfo();
// Verify that this matches the other NN in this HA pair.
// This also initializes our block pool in the DN if we are
// the first NN connection for this BP.
//验证/设置名称空间
bpos.verifyAndSetNamespaceInfo(this, nsInfo);
/* set thread name again to include NamespaceInfo when it's available. */
this.bpThread.setName(formatThreadName("heartbeating", nnAddr));
// Second phase of the handshake with the NN.
//注册
register(nsInfo);
}
进入verifyAndSetNamespaceInfo(this, nsInfo),设置名称空间,初始化块池
void verifyAndSetNamespaceInfo(BPServiceActor actor, NamespaceInfo nsInfo)
throws IOException {
try {
DataNodeFaultInjector.get().delayWhenOfferServiceHoldLock();
if (setNamespaceInfo(nsInfo) == null) {
boolean success = false;
// Now that we know the namespace ID, etc, we can pass this to the DN.
// The DN can now initialize its local storage if we are the
// first BP to handshake, etc.
try {
// 初始化块池对应的DataStorage、初始化FsDatasetImpl对象;
// 初始化启动DataBlockScanner数据块扫描线程、和DirectoryScanners目录检测线程
dn.initBlockPool(this);
success = true;
} finally {
if (!success) {
// The datanode failed to initialize the BP. We need to reset
// the namespace info so that other BPService actors still have
// a chance to set it, and re-initialize the datanode.
setNamespaceInfo(null);
}
}
}
} finally {
writeUnlock();
}
}