Map数量的修改-computeSplitSize

Class JobSubmitter

submitJobInternal(Jobjob, Cluster cluster)

int maps = writeSplits(job,submitJobDir)

  private int writeSplits(org.apache.hadoop.mapreduce.JobContextjob,

      Path jobSubmitDir) throws IOException,

      InterruptedException, ClassNotFoundException{

    JobConf jConf =(JobConf)job.getConfiguration();

    int maps;

    if (jConf.getUseNewMapper()) {

      maps = writeNewSplits(job, jobSubmitDir);

    } else {

      maps = writeOldSplits(jConf,jobSubmitDir);

    }

    return maps;

  }

 

  private <T extends InputSplit>

  int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException,

      InterruptedException,ClassNotFoundException {

    Configuration conf =job.getConfiguration();

    InputFormat<?, ?> input =

      ReflectionUtils.newInstance(job.getInputFormatClass(),conf);

 

    List<InputSplit> splits = input.getSplits(job);

    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

 

    // sort the splits into order based on size, so that thebiggest

    // go first

    Arrays.sort(array, new SplitComparator());

    JobSplitWriter.createSplitFiles(jobSubmitDir,conf,

        jobSubmitDir.getFileSystem(conf),array);

    return array.length;

  }

 

  public List<InputSplit> getSplits(JobContext job) throws IOException {

Stopwatch sw = new Stopwatch().start();

//protected long getFormatMinSplitSize() { return 1; }

//public static long getMinSplitSize (JobContextjob) { return .getConfiguration().getLong //(SPLIT_MINSIZE, 1L);}

// public static long getMaxSplitSize(JobContext context) { return //context.getConfiguration().getLong(SPLIT_MAXSIZE, Long.MAX_VALUE);}

    long minSize = Math.max(getFormatMinSplitSize(),getMinSplitSize(job));//1

    long maxSize = getMaxSplitSize(job);// Long.MAX_VALUE

 

    // generate splits

List<InputSplit> splits = new ArrayList<InputSplit>();

//listStatus列出输入目录,其中FileStatus是文件的客户端信息

   List<FileStatus> files = listStatus(job);//             

    for (FileStatus file: files) {

      Path path = file.getPath();

      long length = file.getLen();

      if (length != 0) {

        BlockLocation[] blkLocations;  //表示一个块的网络位置,包含块复制的主机的信息和其//他块元数据(比如与块相关联的文件偏移量、文件大小,是否是坏文件等)

        if (file instanceof LocatedFileStatus) {

          blkLocations = ((LocatedFileStatus)file).getBlockLocations();

        } else {

          FileSystem fs =path.getFileSystem(job.getConfiguration());

          blkLocations =fs.getFileBlockLocations(file, 0, length);

        }

//protected boolean isSplitable(JobContextcontext, Path filename) {return true;}

        if (isSplitable(job, path)) {

          long blockSize = file.getBlockSize();

// protected long computeSplitSize(long blockSize, long minSize,long maxSize) {

//   return Math.max(minSize, Math.min(maxSize,blockSize)); }

          long splitSize = computeSplitSize(blockSize,minSize, maxSize);

 

          long bytesRemaining = length;

          while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {

            int blkIndex = getBlockIndex(blkLocations,length-bytesRemaining);

            splits.add(makeSplit(path, length-bytesRemaining,splitSize,

                                     blkLocations[blkIndex].getHosts()));

            bytesRemaining -= splitSize;

          }

 

          if (bytesRemaining != 0) {

            int blkIndex = getBlockIndex(blkLocations,length-bytesRemaining);

            splits.add(makeSplit(path, length-bytesRemaining,bytesRemaining,

                      blkLocations[blkIndex].getHosts()));

          }

        } else { // not splitable

          splits.add(makeSplit(path, 0, length,blkLocations[0].getHosts()));

        }

      } else {

        //Create empty hosts array forzero length files

        splits.add(makeSplit(path, 0, length, new String[0]));

      }

    }

    // Save the number of input files for metrics/loadgen

    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());

    sw.stop();

    if (LOG.isDebugEnabled()) {

      LOG.debug("Total # of splits generated bygetSplits: " + splits.size()

          + ", TimeTaken: " + sw.elapsedMillis());

    }

    return splits;

  }

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值