Default value: reportCompileThreadPool –> 1
DirectoryScanner(DataNode datanode, FsDatasetSpi<?> dataset, Configuration conf) {
this.datanode = datanode;
this.dataset = dataset;
int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT);
scanPeriodMsecs = interval * 1000L; //msec
int threads =
conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY,
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);
reportCompileThreadPool = Executors.newFixedThreadPool(threads,
new Daemon.DaemonFactory());
masterThread = new ScheduledThreadPoolExecutor(1,
new Daemon.DaemonFactory());
}
DirectoryScanner.start start the masterThread.
void start() {
shouldRun = true;
long offset = DFSUtil.getRandom().nextInt((int) (scanPeriodMsecs/1000L)) * 1000L; //msec
long firstScanTime = Time.now() + offset;
LOG.info("Periodic Directory Tree Verification scan starting at "
+ firstScanTime + " with interval " + scanPeriodMsecs);
masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs,
TimeUnit.MILLISECONDS);
}
DirectoryScanner.run
Master thread will call run method of DirectoryScanner .
Runs “reconcile()” periodically under the masterThread.
/**
* Main program loop for DirectoryScanner
* Runs "reconcile()" periodically under the masterThread.
*/
@Override
public void run() {
try {
if (!shouldRun) {
//shutdown has been activated
LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated");
return;
}
//We're are okay to run - do it
reconcile();
} catch (Exception e) {
//Log and continue - allows Executor to run again next cycle
LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e);
} catch (Error er) {
//Non-recoverable error - re-throw after logging the problem
LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er);
throw er;
}
}
reconcile
Reconcile differences between disk and in-memory blocks.
At first, it calls scan to put the result in diffs.
Secondly, it calls dataset.checkAndUpdate for each ScanInfo object.
void reconcile() throws IOException {
scan();
for (Entry<String, LinkedList<ScanInfo>> entry : diffs.entrySet()) {
String bpid = entry.getKey();
LinkedList<ScanInfo> diff = entry.getValue();
for (ScanInfo info : diff) {
dataset.checkAndUpdate(bpid, info.getBlockId(), info.getBlockFile(),
info.getMetaFile(), info.getVolume());
}
}
if (!retainDiffs) clear();
}
DirectoryScanner.scan
Scan for the differences between disk and in-memory blocks
Scan only the “finalized blocks” lists of both disk and memory.
void scan() {
clear();
Map<String, ScanInfo[]> diskReport = getDiskReport();
// Hold FSDataset lock to prevent further changes to the block map
synchronized(dataset) {
// compare and set diffs
...
} //end synchronized
}
getDiskReport
Get lists of blocks on the disk sorted by blockId, per blockpool
private Map<String, ScanInfo[]> getDiskReport() {
// First get list of data directories
final List<? extends FsVolumeSpi> volumes = dataset.getVolumes();
// Use an array since the threads may return out of order and
// compilersInProgress#keySet may return out of order as well.
ScanInfoPerBlockPool[] dirReports = new ScanInfoPerBlockPool[volumes.size()];
Map<Integer, Future<ScanInfoPerBlockPool>> compilersInProgress =
new HashMap<Integer, Future<ScanInfoPerBlockPool>>();
for (int i = 0; i < volumes.size(); i++) {
if (isValid(dataset, volumes.get(i))) {
ReportCompiler reportCompiler =
new ReportCompiler(datanode,volumes.get(i));
Future<ScanInfoPerBlockPool> result =
reportCompileThreadPool.submit(reportCompiler);
compilersInProgress.put(i, result);
}
}
for (Entry<Integer, Future<ScanInfoPerBlockPool>> report :
compilersInProgress.entrySet()) {
try {
dirReports[report.getKey()] = report.getValue().get();
} catch (Exception ex) {
LOG.error("Error compiling report", ex);
// Propagate ex to DataBlockScanner to deal with
throw new RuntimeException(ex);
}
}
// Compile consolidated report for all the volumes
ScanInfoPerBlockPool list = new ScanInfoPerBlockPool();
for (int i = 0; i < volumes.size(); i++) {
if (isValid(dataset, volumes.get(i))) {
// volume is still valid
list.addAll(dirReports[i]);
}
}
return list.toSortedArrays();
}
ReportCompiler
Although there is a ReportCompiler object per volume, there is a thread in reportCompileThreadPool, the scanner is executed volume by volume.
private static class ReportCompiler
implements Callable<ScanInfoPerBlockPool> {
private final FsVolumeSpi volume;
private final DataNode datanode;
public ReportCompiler(DataNode datanode, FsVolumeSpi volume) {
this.datanode = datanode;
this.volume = volume;
}
@Override
public ScanInfoPerBlockPool call() throws Exception {
String[] bpList = volume.getBlockPoolList();
ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length);
for (String bpid : bpList) {
LinkedList<ScanInfo> report = new LinkedList<ScanInfo>();
File bpFinalizedDir = volume.getFinalizedDir(bpid);
result.put(bpid,
compileReport(volume, bpFinalizedDir, bpFinalizedDir, report));
}
return result;
}
/** Compile list {@link ScanInfo} for the blocks in the directory <dir> */
private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,
File bpFinalizedDir, File dir, LinkedList<ScanInfo> report) {
File[] files;
try {
files = FileUtil.listFiles(dir);
} catch (IOException ioe) {
LOG.warn("Exception occured while compiling report: ", ioe);
// Initiate a check on disk failure.
datanode.checkDiskErrorAsync();
// Ignore this directory and proceed.
return report;
}
Arrays.sort(files);
/*
* Assumption: In the sorted list of files block file appears immediately
* before block metadata file. This is true for the current naming
* convention for block file blk_<blockid> and meta file
* blk_<blockid>_<genstamp>.meta
*/
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
compileReport(vol, bpFinalizedDir, files[i], report);
continue;
}
if (!Block.isBlockFilename(files[i])) {
if (isBlockMetaFile(Block.BLOCK_FILE_PREFIX, files[i].getName())) {
long blockId = Block.getBlockId(files[i].getName());
verifyFileLocation(files[i].getParentFile(), bpFinalizedDir,
blockId);
report.add(new ScanInfo(blockId, null, files[i], vol));
}
continue;
}
File blockFile = files[i];
long blockId = Block.filename2id(blockFile.getName());
File metaFile = null;
// Skip all the files that start with block name until
// getting to the metafile for the block
while (i + 1 < files.length && files[i + 1].isFile()
&& files[i + 1].getName().startsWith(blockFile.getName())) {
i++;
if (isBlockMetaFile(blockFile.getName(), files[i].getName())) {
metaFile = files[i];
break;
}
}
verifyFileLocation(blockFile.getParentFile(), bpFinalizedDir,
blockId);
report.add(new ScanInfo(blockId, blockFile, metaFile, vol));
}
return report;
}
/**
* Verify whether the actual directory location of block file has the
* expected directory path computed using its block ID.
*/
private void verifyFileLocation(File actualBlockDir,
File bpFinalizedDir, long blockId) {
File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
if (actualBlockDir.compareTo(blockDir) != 0) {
LOG.warn("Block: " + blockId
+ " has to be upgraded to block ID-based layout");
}
}
}
ScanInfoPerBlockPool
static class ScanInfoPerBlockPool extends
HashMap<String, LinkedList<ScanInfo>> {
private static final long serialVersionUID = 1L;
ScanInfoPerBlockPool() {super();}
ScanInfoPerBlockPool(int sz) {super(sz);}
/**
* Merges {@code that} ScanInfoPerBlockPool into this one
*/
public void addAll(ScanInfoPerBlockPool that) {
if (that == null) return;
for (Entry<String, LinkedList<ScanInfo>> entry : that.entrySet()) {
String bpid = entry.getKey();
LinkedList<ScanInfo> list = entry.getValue();
if (this.containsKey(bpid)) {
//merge that per-bpid linked list with this one
this.get(bpid).addAll(list);
} else {
//add that new bpid and its linked list to this
this.put(bpid, list);
}
}
}
/**
* Convert all the LinkedList values in this ScanInfoPerBlockPool map
* into sorted arrays, and return a new map of these arrays per blockpool
* @return a map of ScanInfo arrays per blockpool
*/
public Map<String, ScanInfo[]> toSortedArrays() {
Map<String, ScanInfo[]> result =
new HashMap<String, ScanInfo[]>(this.size());
for (Entry<String, LinkedList<ScanInfo>> entry : this.entrySet()) {
String bpid = entry.getKey();
LinkedList<ScanInfo> list = entry.getValue();
// convert list to array
ScanInfo[] record = list.toArray(new ScanInfo[list.size()]);
// Sort array based on blockId
Arrays.sort(record);
result.put(bpid, record);
}
return result;
}
}