上一篇分析了启动过程中,载入FSImage之前的一系列校验,这次开始看下FSImage真正的载入部分,在这里可以看到对该文件读操作,通过这些操作可以知道该文件的结构,以及内存中的结构,个人觉得HDFS出于安全、恢复方面的考虑,在启动过程中做了相当的校验,但就单纯读取该文件来说,流程还是比较好理解的,如果熟悉JAVA的文件读写的话,这个函数喽一眼就知道个大概了。有一点需要注意,比如我们在集群中有这样一个目录:
HDFS://192.168.0.1:9000/a.txt
HDFS://192.168.0.1:9000/test/b.txt
文件总共两个,但统计文件数的时候为4个,分别是根目录、test目录、a.txt、b.txt
FSImage.java
boolean loadFSImage(File curFile) throws IOException {
assert this.getLayoutVersion() < 0 : "Negative layout version is expected.";
assert curFile != null : "curFile is null";
FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
FSDirectory fsDir = fsNamesys.dir;
//
// Load in bits
//
boolean needToSave = true;
//开始打开文件,构建读入流
DataInputStream in = new DataInputStream(new BufferedInputStream(
new FileInputStream(curFile)));
try {
/*
* Note: Remove any checks for versionearlier than
* Storage.LAST_UPGRADABLE_LAYOUT_VERSIONsince we should never get
* to here with older images.
*/
/*
* TODO we need to change format of the image file
* it should not contain version and namespacefields
*/
// read image version: first appeared in version -1
//读取版本信息,这个值放在第一位,足见其重要性,下面好多校验都要用到这个值
int imgVersion = in.readInt();
// read namespaceID: first appeared in version -2
this.namespaceID = in.readInt();
// read number of files
long numFiles;
if (imgVersion <= -16) {
numFiles = in.readLong();
} else {
numFiles = in.readInt();
}
this.layoutVersion = imgVersion;
// read in the last generation stamp.
if (imgVersion <= -12) {
long genstamp = in.readLong();
fsNamesys.setGenerationStamp(genstamp);
}
needToSave = (imgVersion != FSConstants.LAYOUT_VERSION);
// read file info
short replication = FSNamesystem.getFSNamesystem().getDefaultReplication();
LOG.info("Number of files = " + numFiles);
String path;
StringparentPath = "";
INodeDirectory parentINode = fsDir.rootDir;
for (long i = 0; i < numFiles; i++) {
long modificationTime = 0;
long atime = 0;
long blockSize = 0;
//读取文件路径,读完后为字符串类型,如 “/a.txt”
path = readString(in);
replication = in.readShort();
//注意这里有个限制,文件的副本数必须在minReplication和maxReplication之间,超出不报错,但会
//被重置为最大或最小值
replication = FSEditLog.adjustReplication(replication);
modificationTime = in.readLong();
if (imgVersion <= -17) {
atime = in.readLong();
}
if (imgVersion <= -8) {
blockSize = in.readLong();
}
//一个文件的块数
int numBlocks = in.readInt();
Block blocks[] = null;
// for older versions, a blocklistof size 0
// indicates a directory.
if ((-9 <= imgVersion && numBlocks> 0) ||
(imgVersion < -9 &&numBlocks >= 0)) {
blocks = new Block[numBlocks];
for (int j = 0; j < numBlocks; j++) {
blocks[j] = new Block();
if (-14 < imgVersion) {
blocks[j].set(in.readLong(), in.readLong(),
Block.GRANDFATHER_GENERATION_STAMP);
} else {
//如果是文件的话,会读入blockid,字节数,以及一个Stamp标记,通过blockid就可以查找到
//该数据块对应的物理文件,但这里并没有记录存放在哪个节点上,因为一个块会有多个副本,如果记
//录的话会存放更多元数据,而且位于DATANODE上的block有可能改变地方,比如在block损坏,或新
//加入节点做rebalance的时候,也就是说这些有可能是经常变动的,所以这里只存一个blockid,具
//的存放节点由DATANODE来汇报,并且在内存中更新,这样就提高了namenode的效率,并且减少了对
//fsimage文件的随机写。
blocks[j].readFields(in);
}
}
}
// Older versions of HDFS does not storethe block size in inode.
// If the file has more than one block, usethe size of the
// first block as the blocksize.Otherwise use the default block size.
//这个判断存在的目的是为了向前兼容,老版本的block元数据并不存放blocksize,所以取第一个块
//作为该文件的块尺寸
if (-8 <= imgVersion && blockSize ==0) {
if (numBlocks > 1) {
blockSize =blocks[0].getNumBytes();
} else {
long first = ((numBlocks == 1) ?blocks[0].getNumBytes(): 0);
blockSize = Math.max(fsNamesys.getDefaultBlockSize(),first);
}
}
// 文件为目录的情况下,开始读取配额信息
long nsQuota = -1L;
if (imgVersion <= -16 && blocks == null) {
nsQuota = in.readLong();
}
long dsQuota = -1L;
if (imgVersion <= -18 && blocks == null) {
dsQuota = in.readLong();
}
PermissionStatus permissions =fsNamesys.getUpgradePermission();
if (imgVersion <= -11) {
//开始读取权限信息了,包含用户名、组名、RWX信息,对写权限有分别针对user ownergroup 的
//这些对于熟悉Linux文件系统的人来说,再熟悉不过了
permissions = PermissionStatus.read(in);
}
//如果是根目录,那么要特殊处理,这里单独写了一个判断,执行完后直接进入下一次循环,省得浪费感情
if (path.length() == 0) { // it is the root
// update the root's attributes
if (nsQuota != -1 || dsQuota != -1) {
fsDir.rootDir.setQuota(nsQuota, dsQuota);
}
fsDir.rootDir.setModificationTime(modificationTime);
fsDir.rootDir.setPermissionStatus(permissions);
continue;
}
// check if the new inode belongs tothe same parent
if(!isParent(path, parentPath)) {
parentINode = null;
parentPath = getParent(path);
}
// 将新构建的节点加入父节点并缓存起来
parentINode = fsDir.addToParent(path,parentINode, permissions,
blocks,replication, modificationTime,
atime,nsQuota, dsQuota, blockSize);
}
// 载入DATANODE信息,但在当前1.0.4版本中,这里其实什么都没做,因为fsimage并未存放该信息
this.loadDatanodes(imgVersion, in);
// load Files Under Construction
this.loadFilesUnderConstruction(imgVersion, in,fsNamesys);
//文件尾部信息的读取,算是收尾工作
this.loadSecretManagerState(imgVersion, in,fsNamesys);
} finally {
in.close();
}
return needToSave;
}