hadoop DirectoryScanner

最新推荐文章于 2022-11-25 17:10:57 发布

houzhizhen

最新推荐文章于 2022-11-25 17:10:57 发布

阅读量827

点赞数

分类专栏： hadoop-hdfs

本文链接：https://blog.csdn.net/houzhizhen/article/details/78249394

版权

hadoop-hdfs 专栏收录该内容

40 篇文章 0 订阅

订阅专栏

Default value: reportCompileThreadPool –> 1

DirectoryScanner(DataNode datanode, FsDatasetSpi<?> dataset, Configuration conf) {
    this.datanode = datanode;
    this.dataset = dataset;
    int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY,
        DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT);
    scanPeriodMsecs = interval * 1000L; //msec
    int threads = 
        conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY,
                    DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);

    reportCompileThreadPool = Executors.newFixedThreadPool(threads, 
        new Daemon.DaemonFactory());
    masterThread = new ScheduledThreadPoolExecutor(1,
        new Daemon.DaemonFactory());
  }

DirectoryScanner.start start the masterThread.

  void start() {
    shouldRun = true;
    long offset = DFSUtil.getRandom().nextInt((int) (scanPeriodMsecs/1000L)) * 1000L; //msec
    long firstScanTime = Time.now() + offset;
    LOG.info("Periodic Directory Tree Verification scan starting at " 
        + firstScanTime + " with interval " + scanPeriodMsecs);
    masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs, 
                                     TimeUnit.MILLISECONDS);
  }

DirectoryScanner.run

Master thread will call run method of DirectoryScanner .
Runs “reconcile()” periodically under the masterThread.

/**
   * Main program loop for DirectoryScanner
   * Runs "reconcile()" periodically under the masterThread.
   */
  @Override
  public void run() {
    try {
      if (!shouldRun) {
        //shutdown has been activated
        LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated");
        return;
      }

      //We're are okay to run - do it
      reconcile();      

    } catch (Exception e) {
      //Log and continue - allows Executor to run again next cycle
      LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e);
    } catch (Error er) {
      //Non-recoverable error - re-throw after logging the problem
      LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er);
      throw er;
    }
  }

reconcile

Reconcile differences between disk and in-memory blocks.
At first, it calls scan to put the result in diffs.
Secondly, it calls dataset.checkAndUpdate for each ScanInfo object.

  void reconcile() throws IOException {
    scan();
    for (Entry<String, LinkedList<ScanInfo>> entry : diffs.entrySet()) {
      String bpid = entry.getKey();
      LinkedList<ScanInfo> diff = entry.getValue();

      for (ScanInfo info : diff) {
        dataset.checkAndUpdate(bpid, info.getBlockId(), info.getBlockFile(),
            info.getMetaFile(), info.getVolume());
      }
    }
    if (!retainDiffs) clear();
  }

DirectoryScanner.scan

Scan for the differences between disk and in-memory blocks
Scan only the “finalized blocks” lists of both disk and memory.


  void scan() {
    clear();
    Map<String, ScanInfo[]> diskReport = getDiskReport();

    // Hold FSDataset lock to prevent further changes to the block map
    synchronized(dataset) {
      // compare and set diffs
      ...
    } //end synchronized
  }

getDiskReport

Get lists of blocks on the disk sorted by blockId, per blockpool


  private Map<String, ScanInfo[]> getDiskReport() {
    // First get list of data directories
    final List<? extends FsVolumeSpi> volumes = dataset.getVolumes();

    // Use an array since the threads may return out of order and
    // compilersInProgress#keySet may return out of order as well.
    ScanInfoPerBlockPool[] dirReports = new ScanInfoPerBlockPool[volumes.size()];

    Map<Integer, Future<ScanInfoPerBlockPool>> compilersInProgress =
      new HashMap<Integer, Future<ScanInfoPerBlockPool>>();

    for (int i = 0; i < volumes.size(); i++) {
      if (isValid(dataset, volumes.get(i))) {
        ReportCompiler reportCompiler =
          new ReportCompiler(datanode,volumes.get(i));
        Future<ScanInfoPerBlockPool> result = 
          reportCompileThreadPool.submit(reportCompiler);
        compilersInProgress.put(i, result);
      }
    }

    for (Entry<Integer, Future<ScanInfoPerBlockPool>> report :
        compilersInProgress.entrySet()) {
      try {
        dirReports[report.getKey()] = report.getValue().get();
      } catch (Exception ex) {
        LOG.error("Error compiling report", ex);
        // Propagate ex to DataBlockScanner to deal with
        throw new RuntimeException(ex);
      }
    }

    // Compile consolidated report for all the volumes
    ScanInfoPerBlockPool list = new ScanInfoPerBlockPool();
    for (int i = 0; i < volumes.size(); i++) {
      if (isValid(dataset, volumes.get(i))) {
        // volume is still valid
        list.addAll(dirReports[i]);
      }
    }

    return list.toSortedArrays();
  }

ReportCompiler

Although there is a ReportCompiler object per volume, there is a thread in reportCompileThreadPool, the scanner is executed volume by volume.

private static class ReportCompiler 
  implements Callable<ScanInfoPerBlockPool> {
    private final FsVolumeSpi volume;
    private final DataNode datanode;

    public ReportCompiler(DataNode datanode, FsVolumeSpi volume) {
      this.datanode = datanode;
      this.volume = volume;
    }

    @Override
    public ScanInfoPerBlockPool call() throws Exception {
      String[] bpList = volume.getBlockPoolList();
      ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length);
      for (String bpid : bpList) {
        LinkedList<ScanInfo> report = new LinkedList<ScanInfo>();
        File bpFinalizedDir = volume.getFinalizedDir(bpid);
        result.put(bpid,
            compileReport(volume, bpFinalizedDir, bpFinalizedDir, report));
      }
      return result;
    }

    /** Compile list {@link ScanInfo} for the blocks in the directory <dir> */
    private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,
        File bpFinalizedDir, File dir, LinkedList<ScanInfo> report) {
      File[] files;
      try {
        files = FileUtil.listFiles(dir);
      } catch (IOException ioe) {
        LOG.warn("Exception occured while compiling report: ", ioe);
        // Initiate a check on disk failure.
        datanode.checkDiskErrorAsync();
        // Ignore this directory and proceed.
        return report;
      }
      Arrays.sort(files);
      /*
       * Assumption: In the sorted list of files block file appears immediately
       * before block metadata file. This is true for the current naming
       * convention for block file blk_<blockid> and meta file
       * blk_<blockid>_<genstamp>.meta
       */
      for (int i = 0; i < files.length; i++) {
        if (files[i].isDirectory()) {
          compileReport(vol, bpFinalizedDir, files[i], report);
          continue;
        }
        if (!Block.isBlockFilename(files[i])) {
          if (isBlockMetaFile(Block.BLOCK_FILE_PREFIX, files[i].getName())) {
            long blockId = Block.getBlockId(files[i].getName());
            verifyFileLocation(files[i].getParentFile(), bpFinalizedDir,
                blockId);
            report.add(new ScanInfo(blockId, null, files[i], vol));
          }
          continue;
        }
        File blockFile = files[i];
        long blockId = Block.filename2id(blockFile.getName());
        File metaFile = null;

        // Skip all the files that start with block name until
        // getting to the metafile for the block
        while (i + 1 < files.length && files[i + 1].isFile()
            && files[i + 1].getName().startsWith(blockFile.getName())) {
          i++;
          if (isBlockMetaFile(blockFile.getName(), files[i].getName())) {
            metaFile = files[i];
            break;
          }
        }
        verifyFileLocation(blockFile.getParentFile(), bpFinalizedDir,
            blockId);
        report.add(new ScanInfo(blockId, blockFile, metaFile, vol));
      }
      return report;
    }

    /**
     * Verify whether the actual directory location of block file has the
     * expected directory path computed using its block ID.
     */
    private void verifyFileLocation(File actualBlockDir,
        File bpFinalizedDir, long blockId) {
      File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
      if (actualBlockDir.compareTo(blockDir) != 0) {
        LOG.warn("Block: " + blockId
            + " has to be upgraded to block ID-based layout");
      }
    }
  }

ScanInfoPerBlockPool

static class ScanInfoPerBlockPool extends 
                     HashMap<String, LinkedList<ScanInfo>> {

    private static final long serialVersionUID = 1L;

    ScanInfoPerBlockPool() {super();}

    ScanInfoPerBlockPool(int sz) {super(sz);}

    /**
     * Merges {@code that} ScanInfoPerBlockPool into this one
     */
    public void addAll(ScanInfoPerBlockPool that) {
      if (that == null) return;

      for (Entry<String, LinkedList<ScanInfo>> entry : that.entrySet()) {
        String bpid = entry.getKey();
        LinkedList<ScanInfo> list = entry.getValue();

        if (this.containsKey(bpid)) {
          //merge that per-bpid linked list with this one
          this.get(bpid).addAll(list);
        } else {
          //add that new bpid and its linked list to this
          this.put(bpid, list);
        }
      }
    }

    /**
     * Convert all the LinkedList values in this ScanInfoPerBlockPool map
     * into sorted arrays, and return a new map of these arrays per blockpool
     * @return a map of ScanInfo arrays per blockpool
     */
    public Map<String, ScanInfo[]> toSortedArrays() {
      Map<String, ScanInfo[]> result = 
        new HashMap<String, ScanInfo[]>(this.size());

      for (Entry<String, LinkedList<ScanInfo>> entry : this.entrySet()) {
        String bpid = entry.getKey();
        LinkedList<ScanInfo> list = entry.getValue();

        // convert list to array
        ScanInfo[] record = list.toArray(new ScanInfo[list.size()]);
        // Sort array based on blockId
        Arrays.sort(record);
        result.put(bpid, record);            
      }
      return result;
    }
  }