HDFS源码解析---TestDFSIO

11 篇文章 4 订阅

        这个工具怎么用没啥好说的,里面的原理简单说一下。

MapReduce

        这个工具会起mapreduce 任务在yarn的各个节点(客户端)发起读写请求。TestDFSIO.java的run方法会调用mapreduce作业任务。

        

@Override // Tool
  public int run(String[] args) throws IOException {
    TestType testType = null;
    int bufferSize = DEFAULT_BUFFER_SIZE;
    long nrBytes = 1*MEGA;
    int nrFiles = 1;
    long skipSize = 0;
    String resFileName = DEFAULT_RES_FILE_NAME;
    String compressionClass = null;
    boolean isSequential = false;
    String version = TestDFSIO.class.getSimpleName() + ".1.7";

    LOG.info(version);
    if (args.length == 0) {
      System.err.println("Missing arguments.");
      return -1;
    }

    for (int i = 0; i < args.length; i++) {       // parse command line
      if (args[i].startsWith("-read")) {
        testType = TestType.TEST_TYPE_READ;
      } else if (args[i].equals("-write")) {
        testType = TestType.TEST_TYPE_WRITE;
      } else if (args[i].equals("-append")) {
        testType = TestType.TEST_TYPE_APPEND;
      } else if (args[i].equals("-random")) {
        if(testType != TestType.TEST_TYPE_READ) return -1;
        testType = TestType.TEST_TYPE_READ_RANDOM;
      } else if (args[i].equals("-backward")) {
        if(testType != TestType.TEST_TYPE_READ) return -1;
        testType = TestType.TEST_TYPE_READ_BACKWARD;
      } else if (args[i].equals("-skip")) {
        if(testType != TestType.TEST_TYPE_READ) return -1;
        testType = TestType.TEST_TYPE_READ_SKIP;
      } else if (args[i].equals("-clean")) {
        testType = TestType.TEST_TYPE_CLEANUP;
      } else if (args[i].startsWith("-seq")) {
        isSequential = true;
      } else if (args[i].startsWith("-compression")) {
        compressionClass = args[++i];
      } else if (args[i].equals("-nrFiles")) {
        nrFiles = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-fileSize") || args[i].equals("-size")) {
        nrBytes = parseSize(args[++i]);
      } else if (args[i].equals("-skipSize")) {
        skipSize = parseSize(args[++i]);
      } else if (args[i].equals("-bufferSize")) {
        bufferSize = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-resFile")) {
        resFileName = args[++i];
      } else {
        System.err.println("Illegal argument: " + args[i]);
        return -1;
      }
    }
    if(testType == null)
      return -1;
    if(testType == TestType.TEST_TYPE_READ_BACKWARD)
      skipSize = -bufferSize;
    else if(testType == TestType.TEST_TYPE_READ_SKIP && skipSize == 0)
      skipSize = bufferSize;

    LOG.info("nrFiles = " + nrFiles);
    LOG.info("nrBytes (MB) = " + toMB(nrBytes));
    LOG.info("bufferSize = " + bufferSize);
    if(skipSize > 0)
      LOG.info("skipSize = " + skipSize);
    LOG.info("baseDir = " + getBaseDir(config));
    
    if(compressionClass != null) {
      config.set("test.io.compression.class", compressionClass);
      LOG.info("compressionClass = " + compressionClass);
    }

    config.setInt("test.io.file.buffer.size", bufferSize);
    config.setLong("test.io.skip.size", skipSize);
    config.setBoolean(DFSConfigKeys.DFS_SUPPORT_APPEND_KEY, true);
    FileSystem fs = FileSystem.get(config);

    if (isSequential) {
      long tStart = System.currentTimeMillis();
      sequentialTest(fs, testType, nrBytes, nrFiles);
      long execTime = System.currentTimeMillis() - tStart;
      String resultLine = "Seq Test exec time sec: " + (float)execTime / 1000;
      LOG.info(resultLine);
      return 0;
    }
    if (testType == TestType.TEST_TYPE_CLEANUP) {
      cleanup(fs);
      return 0;
    }
    createControlFile(fs, nrBytes, nrFiles);
    long tStart = System.currentTimeMillis();
    switch(testType) {
    case TEST_TYPE_WRITE:
      writeTest(fs);
      break;
    case TEST_TYPE_READ:
      readTest(fs);
      break;
    case TEST_TYPE_APPEND:
      appendTest(fs);
      break;
    case TEST_TYPE_READ_RANDOM:
    case TEST_TYPE_READ_BACKWARD:
    case TEST_TYPE_READ_SKIP:
      randomReadTest(fs);
    }
    long execTime = System.currentTimeMillis() - tStart;
  
    analyzeResult(fs, testType, execTime, resFileName);
    return 0;
  }

        1、获取参数

        2、根据测试类型决定调用哪一种Mapper(WriteMapper,ReadMapper,AppendMapper)

 private void runIOTest(
          Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
          Path outputDir) throws IOException {
    JobConf job = new JobConf(config, TestDFSIO.class);

    FileInputFormat.setInputPaths(job, getControlDir(config));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

 

ControlFile

        问题来了,mapreduce 任务一定要知道每个map该执行啥,你可以抽象理解为mapreduce一定要读一个文件,这个文件一行一行的排布,每一行都包含了每个map任务要执行的信息。

@SuppressWarnings("deprecation")
  private void createControlFile(FileSystem fs,
                                  long nrBytes, // in bytes
                                  int nrFiles
                                ) throws IOException {
    LOG.info("creating control file: "+nrBytes+" bytes, "+nrFiles+" files");

    Path controlDir = getControlDir(config);
    fs.delete(controlDir, true);

    for(int i=0; i < nrFiles; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(controlDir, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer = SequenceFile.createWriter(fs, config, controlFile,
                                           Text.class, LongWritable.class,
                                           CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(nrBytes));
      } catch(Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
    	if (writer != null)
          writer.close();
    	writer = null;
      }
    }
    LOG.info("created control files for: "+nrFiles+" files");
  }

        可以看到,这个代码就是在执行mapreduce 之前先向hdfs 写入控制文件,控制mapreduce 任务的执行,控制文件的每一行写入了文件名和文件大小(byte)。     

writer.append(new Text(name), new LongWritable(nrBytes));

这样,控制文件有多少行,map就会执行多少次,每次执行的时候,以Write为例吧

public void map(Text key, 
                  LongWritable value,
                  OutputCollector<Text, Text> output, 
                  Reporter reporter) throws IOException {
    String name = key.toString();
    long longValue = value.get();
    
    reporter.setStatus("starting " + name + " ::host = " + hostName);

    this.stream = getIOStream(name);
    T statValue = null;
    long tStart = System.currentTimeMillis();
    try {
      statValue = doIO(reporter, name, longValue);
    } finally {
      if(stream != null) stream.close();
    }
    long tEnd = System.currentTimeMillis();
    long execTime = tEnd - tStart;
    collectStats(output, name, execTime, statValue);
    
    reporter.setStatus("finished " + name + " ::host = " + hostName);
  }

key就是文件名,这样map就知道write的时候创建的文件名是什么。

value可以理解为要写入的文件大小。

        这样,getIOStream可以通过name创建文件,doIO可以通过longValue知道每次写入的总的bytes是多少,因为是一个一个buffer写入的,这样就可以计算出需要多少buffer。

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值