009-hadoop二次开发-DataNode启动流程
datanode启动流程相较于namenode启动流程更复杂一些。
/**
* 启动datanode,主要是要完成3件事:
* 1、如何完成存储
* 2、如何与namenode进行通信,这个通过IPC 心跳连接实现。此外还有和客户端 其它datanode之间的信息交换
* 3、完成和客户端还有其它节点的大规模通信,这个需要直接通过socket 协议实现
* */
public static void main(String args[]) {
if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
System.exit(0);
}
secureMain(args, null);
}
在源码文件DataNode.java中通过main函数来启动,首先做参数的合理化校验,不合理退出,合理进入secureMain方法,
/**
* 本方法干两件事:
* 1、打印datanode启动日志
* 2、创建datanode
* 3、线程挂起
* */
public static void secureMain(String args[], SecureResources resources) {
int errorCode = 0;
try {
//启动DataNode,打印基本信息日志(域名、参数、版本、jdk环境等)
StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
//TODO 初始化datanode
DataNode datanode = createDataNode(args, null, resources);
if (datanode != null) {
datanode.join();
} else {
errorCode = 1;
}
} catch (Throwable e) {
LOG.fatal("Exception in secureMain", e);
terminate(1, e);
} finally {
// We need to terminate the process here because either shutdown was called
// or some disk related conditions like volumes tolerated or volumes required
// condition was not met. Also, In secure mode, control will go to Jsvc
// and Datanode process hangs if it does not exit.
LOG.warn("Exiting Datanode");
terminate(errorCode);
}
}
在secureMain方法中首先启动日志信息,将LOG绑定给DataNode。
然后初始化DataNode,判断datanode是否为空,不为空加入线程。
进入createDataNode方法
/** Instantiate & Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
* 本方法主要两件事:
* 1、实例化一个datanode
* 2、运行datanode的守护线程
*/
@VisibleForTesting
@InterfaceAudience.Private
public static DataNode createDataNode(String args[], Configuration conf,
SecureResources resources) throws IOException {
//实例化一个datanode
DataNode dn = instantiateDataNode(args, conf, resources);
if (dn != null) {
//进行DataNode注册,创建线程,设置守护线程,启动线程
dn.runDatanodeDaemon();
}
return dn;
}
在守护线程runDatanodeDaemon中做了什么,
将数据块管理启动了,将数据接收线程启动了,将本地的接收线程启动了,将ipc启动了,就是守护线程启动了后台线程。
```java
/** Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
public void runDatanodeDaemon() throws IOException {
//在DataNode.instantiateDataNode()执行过程中会调用该方法
blockPoolManager.startAll();
// start dataXceiveServer
dataXceiverServer.start();
if (localDataXceiverServer != null) {
localDataXceiverServer.start();
}
ipcServer.start();
startPlugins(conf);
}
这些线程创建的前提是什么?
需要实例化datanode,
/** Instantiate a single datanode object, along with its secure resources.
* This must be run by invoking{@link DataNode#runDatanodeDaemon()}
* subsequently.
* 本方法主要做2件事:
* 1、拿到数据存储路径(dfs.datanode.data.dir)--- dataLocations
* 2、携带三个参数(数据存储路径、配置文件、SecureResources)去实例化Datanode
*/
public static DataNode instantiateDataNode(String args [], Configuration conf,
SecureResources resources) throws IOException {
//##################准备相关参数,然后打印#############################
if (conf == null)
conf = new HdfsConfiguration();
if (args != null) {
// parse generic hadoop options
GenericOptionsParser hParser = new GenericOptionsParser(conf, args);
args = hParser.getRemainingArgs();
}
//根据args解析加载conf的参数值
if (!parseArguments(args, conf)) {
printUsage(System.err);
return null;
}
//############拿到hdfs存储数据的url目录(${dfs.datanode.data.dir = /dfs/dn , /DFS/dn2 , /dfs/dn3})################################
Collection<StorageLocation> dataLocations = getStorageLocations(conf);
//############安全认证################################
UserGroupInformation.setConfiguration(conf);
SecurityUtil.login(conf, DFS_DATANODE_KEYTAB_FILE_KEY,
DFS_DATANODE_KERBEROS_PRINCIPAL_KEY);
//TODO ############核心代码################################
return makeInstance(dataLocations, conf, resources);
}
dataLocations是什么?
public static List<StorageLocation> getStorageLocations(Configuration conf) {
//获取hdfs的数据存放目录 // /dfs/dn1 , /dfs/dn2
Collection<String> rawLocations = conf.getTrimmedStringCollection(DFS_DATANODE_DATA_DIR_KEY);//dfs.datanode.data.dir
List<StorageLocation> locations = new ArrayList<StorageLocation>(rawLocations.size());
for(String locationString : rawLocations) {
final StorageLocation location;
try {
//拿到hdfs存储数据的url连接
location = StorageLocation.parse(locationString);
} catch (IOException ioe) {
LOG.error("Failed to initialize storage directory " + locationString
+ ". Exception details: " + ioe);
// Ignore the exception.
continue;
} catch (SecurityException se) {
LOG.error("Failed to initialize storage directory " + locationString
+ ". Exception details: " + se);
// Ignore the exception.
continue;
}
locations.add(location);
}
return locations;
}
接下来如何进行makeInstance
/**
* Make an instance of DataNode after ensuring that at least one of the
* given data directories (and their parent directories, if necessary)
* can be created.
* @param dataDirs List of directories, where the new DataNode instance should
* keep its files.
* @param conf Configuration instance to use.
* @param resources Secure resources needed to run under Kerberos
* @return DataNode instance for given list of data dirs and conf, or null if
* no directory from this directory list can be created.
* @throws IOException
* 本方法主要做两件事:
* 1、对磁盘进行校验 ,并返回可用的路径列表
* 2、创建datanode对象
*/
static DataNode makeInstance(Collection<StorageLocation> dataDirs,
Configuration conf, SecureResources resources) throws IOException {
//获取客户端校验类
LocalFileSystem localFS = FileSystem.getLocal(conf);
//拿到存储数据目录的权限
FsPermission permission = new FsPermission(
conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
//初始化磁盘检测对象
DataNodeDiskChecker dataNodeDiskChecker =
new DataNodeDiskChecker(permission);
/**
* TODO 传入磁盘检测对象进行磁盘检测,并返回可用的目录列表
* */
List<StorageLocation> locations =
checkStorageLocations(dataDirs, localFS, dataNodeDiskChecker);
DefaultMetricsSystem.initialize("DataNode");
assert locations.size() > 0 : "number of data directories should be > 0";
return new DataNode(conf, locations, resources);
}
checkStorageLocations怎么进行校验的?
/**本方法 利用磁盘检测对象进行磁盘目录的检测 , 返回可用磁盘目录列表*/
static List<StorageLocation> checkStorageLocations(
Collection<StorageLocation> dataDirs,
LocalFileSystem localFS, DataNodeDiskChecker dataNodeDiskChecker)
throws IOException {
//声明一个集合,用来存储可用目录列表
ArrayList<StorageLocation> locations = new ArrayList<StorageLocation>();
//不可用目录
StringBuilder invalidDirs = new StringBuilder();
// 遍历数据目录,注意这里就是串行的方式
for (StorageLocation location : dataDirs) {
final URI uri = location.getUri();
try {
// 利用磁盘检测对象进行磁盘目录的检测:校验目录的读写执行权限 ,如果目录不存在,则创建目录并给予700权限
dataNodeDiskChecker.checkDir(localFS, new Path(uri));
// 检测完毕没有抛出异常,则说明目录可用,加入到可用列表
locations.add(location);
} catch (IOException ioe) {
// 如果出现IO异常,说明此磁盘目录不可用,加入到目录中
LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " "
+ location.getFile() + " : ", ioe);
invalidDirs.append("\"").append(uri.getPath()).append("\" ");
}
}
// 如果可用目录数量为0,表明所有的目录都不可用
if (locations.size() == 0) {
throw new IOException("All directories in "
+ DFS_DATANODE_DATA_DIR_KEY + " are invalid: "
+ invalidDirs);
}
// 返回可用数据目录列表
return locations;
}
这里还没有说怎么去校验,只是声明了可用目录列表和不可用目录列表。
继续查看dataNodeDiskChecker.checkDir(localFS, new Path(uri));怎么进行校验的
/**对目录进行权限的校验*/
public void checkDir(LocalFileSystem localFS, Path path)
throws DiskErrorException, IOException {
DiskChecker.checkDir(localFS, path, expectedPermission);
}
/**
* Create the local directory if necessary, check permissions and also ensure
* it can be read from and written into.
*
* @param localFS local filesystem
* @param dir directory
* @param expected permission
* @throws DiskErrorException
* @throws IOException
*/
public static void checkDir(LocalFileSystem localFS, Path dir,
FsPermission expected)
throws DiskErrorException, IOException {
//校验当前文件夹,如果当前的文件夹不存在,则创建并且赋予700权限
mkdirsWithExistsAndPermissionCheck(localFS, dir, expected);
/**检查目录的读、写、执行权限*/
checkDirAccess(localFS.pathToFile(dir));
}
/**
* Create the directory or check permissions if it already exists.
*
* The semantics of mkdirsWithExistsAndPermissionCheck method is different
* from the mkdirs method provided in the Sun's java.io.File class in the
* following way:
* While creating the non-existent parent directories, this method checks for
* the existence of those directories if the mkdir fails at any point (since
* that directory might have just been created by some other process).
* If both mkdir() and the exists() check fails for any seemingly
* non-existent directory, then we signal an error; Sun's mkdir would signal
* an error (return false) if a directory it is attempting to create already
* exists or the mkdir fails.
*
* @param localFS local filesystem
* @param dir directory to be created or checked
* @param expected expected permission
* @throws IOException
*/
public static void mkdirsWithExistsAndPermissionCheck(
LocalFileSystem localFS, Path dir, FsPermission expected)
throws IOException {
File directory = localFS.pathToFile(dir);
boolean created = false;
//如果当前的路径是不存在的,则创建文件夹
if (!directory.exists())
created = mkdirsWithExistsCheck(directory);
//如果当前的文件是刚创建的( mkdirsWithExistsCheck(directory)),则给赋予700权限
if (created || !localFS.getFileStatus(dir).getPermission().equals(expected))
localFS.setPermission(dir, expected);
}
如果路径不存在,创建文件夹
/**
* The semantics of mkdirsWithExistsCheck method is different from the mkdirs
* method provided in the Sun's java.io.File class in the following way:
* While creating the non-existent parent directories, this method checks for
* the existence of those directories if the mkdir fails at any point (since
* that directory might have just been created by some other process).
* If both mkdir() and the exists() check fails for any seemingly
* non-existent directory, then we signal an error; Sun's mkdir would signal
* an error (return false) if a directory it is attempting to create already
* exists or the mkdir fails.
* @param dir
* @return true on success, false on failure
*/
public static boolean mkdirsWithExistsCheck(File dir) {
if (dir.mkdir() || dir.exists()) {
return true;
}
File canonDir = null;
try {
canonDir = dir.getCanonicalFile();
} catch (IOException e) {
return false;
}
String parent = canonDir.getParent();
return (parent != null) &&
(mkdirsWithExistsCheck(new File(parent)) &&
(canonDir.mkdir() || canonDir.exists()));
}
通过递归的方式创建文件。
通过checkDirAccess检查目录的读、写、执行权限
/**
* Checks that the given file is a directory and that the current running
* process can read, write, and execute it.
*
* @param dir File to check
* @throws DiskErrorException if dir is not a directory, not readable, not
* writable, or not executable
*/
private static void checkDirAccess(File dir) throws DiskErrorException {
if (!dir.isDirectory()) {
throw new DiskErrorException("Not a directory: "
+ dir.toString());
}
checkAccessByFileMethods(dir);
}
/**
* Checks that the current running process can read, write, and execute the
* given directory by using methods of the File object.
*
* @param dir File to check
* @throws DiskErrorException if dir is not readable, not writable, or not
* executable
*/
private static void checkAccessByFileMethods(File dir)
throws DiskErrorException {
//读
if (!FileUtil.canRead(dir)) {
throw new DiskErrorException("Directory is not readable: "
+ dir.toString());
}
//写
if (!FileUtil.canWrite(dir)) {
throw new DiskErrorException("Directory is not writable: "
+ dir.toString());
}
//执行
if (!FileUtil.canExecute(dir)) {
throw new DiskErrorException("Directory is not executable: "
+ dir.toString());
}
}
这个过程中子方法每层都在抛异常,然后再checkStorageLocations方法中catch住,并累加到不可用目录列表。
当makeInstance拿到可用列表,可以构造datanode了, return new DataNode(conf, locations, resources);