Hadoop中MapReduce整体源码解析

MapReduce整体源码解析:

前言:本文章分析的源码基于Hadoop 3.1.3,主要是针对MR流程过一遍源码,源码的详细解释只涉及一些重要部分,鉴于水平有限,免不了有些说明不够正确,欢迎各位大佬指正。
废话不多说,开搞!

job提交流程:

1. waitForCompletion()提交,进入submit方法,通过connnet方法获取当前job需要执行到本地还是Yarn
//1.waitForCompletion后,进入到job.java中,源码1587行
if (state == JobState.DEFINE) { //判断是否是定义的job
      submit();
    }

//2.进入submit
public void submit()  //源码1560行 
         throws IOException, InterruptedException, ClassNotFoundException {
    ensureState(JobState.DEFINE); //再次确认是否是定义的job
    setUseNewAPI(); //使用新版API
    connect(); 
    //进入connect,步骤:
    //return new Cluster(getConfiguration());
	//初始化操作initialize(jobTrackAddr, conf);  Cluster类中109行
	//根据配置文件的参数信息,通过YarnClientProtocolProvider 获取到  LocalClientProtocolProvider  
	//获取当前job需要执行到本地还是Yarn
	//最终:LocalClientProtocolProvider  ==> LocalJobRunner
    
    final JobSubmitter submitter = 
        getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
    status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
      public JobStatus run() throws IOException, InterruptedException, 
      ClassNotFoundException {
        return submitter.submitJobInternal(Job.this, cluster); //回到job.java,提交job, 1570行
      }
    });
    
2. return submitter.submitJobInternal(Job.this, cluster); 提交job
2.1先通过checkSpecs(job),jobSubmitter中143行; 检查job的输出路径。
// jobSubmitter中268行
private void checkSpecs(Job job) throws ClassNotFoundException, 
      InterruptedException, IOException {
    JobConf jConf = (JobConf)job.getConfiguration();
    // Check the output specification
    if (jConf.getNumReduceTasks() == 0 ? 
        jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
      org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
        ReflectionUtils.newInstance(job.getOutputFormatClass(),
          job.getConfiguration());
      output.checkOutputSpecs(job); //确认输出路径
    } else {
      jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
    }
  }

//进入checkOutputSpecs(job),FileOutputFomat中151行
  public void checkOutputSpecs(JobContext job
                               ) throws FileAlreadyExistsException, IOException{
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null) { //假如输出路径没有设置,报错
      throw new InvalidJobConfException("Output directory not set.");
    }

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
        new Path[] { outDir }, job.getConfiguration());

    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { //路径已存在,报错
      throw new FileAlreadyExistsException("Output directory " + outDir + 
                                           " already exists");
    }
  }
2.2 生成Job提交的临时目录

Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf); //jobSubmitter中148行

file:/tmp/hadoop/mapred/staging/yong1727053970/.staging
2.3为当前Job生成Id
JobID jobId = submitClient.getNewJobID(); //jobSubmitter中157行
2.4确定Job的提交路径
Path submitJobDir = new Path(jobStagingArea, jobId.toString()); //jobSubmitter中159行,尚未创建
2.5创建job的提交路径
copyAndConfigureFiles(job, submitJobDir);//源码第194行
2.6生成切片信息 ,并返回切片的个数(重要,小心!)
int maps = writeSplits(job, submitJobDir); //源码第200行

//具体切片方法在FileInputFomat下的getSplits(), 389行。
public List<InputSplit> getSplits(JobContext job) throws IOException {
    StopWatch sw = new StopWatch().start();
    //getFormatMinSplitSize()返回1,getMinSplitSize(job)默认为0,可以设置参数修改
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));//即默认为1
    //默认为Long.MAX_VALUE,很大
    long maxSize = getMaxSplitSize(job);
    
    if (isSplitable(job, path)) { //如果文件是可切分的, 414行
          long blockSize = file.getBlockSize(); //块大小,集群模式下默认128M,本地模式下33554432B(32M)
          long splitSize = computeSplitSize(blockSize, minSize, maxSize); //计算切片大小
    } 
    
    //minsize默认为1,maxsize默认很大,blocksize默认为128M,则此方法默认返回128M
    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
    	return Math.max(minSize, Math.min(maxSize, blockSize));
  	} //即,如果要增加块大小,可修改配置文件,增大minsize,反之可减小maxsize。
    
    //418行
    long bytesRemaining = length; //文件剩余大小
    	//如果说文件剩余大小 / 切片大小 > SPLIT_SLOP,才继续切块。SPLIT_SLOP为1.1
    	//啥意思呢,就是说如果文件被切了多次,剩余129M,此时块大小是128M的话,那这剩余的文件不会再进行切分,而是被整体当作一个块。这个设定是为了防止数据倾斜。
          while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
            splits.add(makeSplit(path, length-bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts(),
                        blkLocations[blkIndex].getCachedHosts()));
            bytesRemaining -= splitSize;
          }
2.7通过切片的个数设置MapTask的个数
conf.setInt(MRJobConfig.NUM_MAPS, maps); //201行
2.8将当前Job相关的配置信息写到job提交路径下
writeConf(conf, submitJobFile); //245行, 生成job.xml
2.9真正提交Job
status = submitClient.submitJob(
				 jobId, submitJobDir.toString(), job.getCredentials()); //251行

//LocalJobRunner下788行
Job job = new Job(JobID.downgrade(jobid), jobSubmitDir); //构造真正执行的job,之前的job是以用户角度看的
2.10等job执行完成后,删除Job的临时工作目录的内容
jtFs.delete(submitJobDir, true); //262行

MapTask的工作机制

1.接Job提交流程2.9中,转到LocalJobRunnber的内部类Job 的run()方法
//537行,读取job.splitmetainfo
try {
        TaskSplitMetaInfo[] taskSplitMetaInfos = 
          SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir);
  //获得reduceTask个数 
  int numReduceTasks = job.getNumReduceTasks();
2.根据切片的个数, 创建执行MapTask的 MapTaskRunnable(线程)
//547行
List<RunnableWithThrowable> mapRunnables = getMapTaskRunnables(
            taskSplitMetaInfos, jobId, mapOutputFiles);
3.创建线程池

若是有多个切片,则需要多个maptask,就需要多个线程去执行

ExecutorService mapService = createMapExecutor(); 
4.执行 MapTaskRunnable
runTasks(mapRunnables, mapService, "map");
5.因为Runnable提交给线程池执行,接下来会执行MapTaskRunnable的run方法。

6.执行 LocalJobRunner的内部类Job的内部类MapTaskRunnable 中的run()方法.

//LocalJobRunner中254行
MapTask map = new MapTask(systemJobFile.toString(), mapId, taskId,
            info.getSplitIndex(), 1); //创建MapTask对象

7.执行MapTask中的run方法
//271行
map.run(localConf, Job.this); 

//run方法中执行runNewMapper, 347行
runNewMapper(job, splitMetaInfo, umbilical, reporter); 

//进到runNewMapper
//创建一个context,MapTask中752行
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
      new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job, getTaskID(), reporter);

//获取mapper,MapTask中757行
org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE> mapper =
      (org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE>)
        ReflectionUtils.newInstance(taskContext.getMapperClass(), job);

//创建inputFormat 761行
org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE> inputFormat =
      (org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE>)
        ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);

//重新构建切片对象 765行
org.apache.hadoop.mapreduce.InputSplit split = null;
    split = getSplitDetails(new Path(splitIndex.getSplitLocation()),
        splitIndex.getStartOffset());
    LOG.info("Processing split: " + split);

//获得读取器RecordReader,770行
org.apache.hadoop.mapreduce.RecordReader<INKEY,INVALUE> input =
      new NewTrackingRecordReader<INKEY,INVALUE>
        (split, inputFormat, reporter, taskContext);

//构造缓冲区对象,778行
if (job.getNumReduceTasks() == 0) {
      output =  //如果NumReduceTasks=0,则直接往本地文件写,不走shuffle流程
        new NewDirectOutputCollector(taskContext, job, umbilical, reporter);
    } else {  //不为0,则构建缓冲区
      output = new NewOutputCollector(taskContext, job, umbilical, reporter);
    }
//进入NewOutputCollector  710行
进入collector = createSortingCollector(job, reporter);

collector.init(context);//初始化缓冲区 408行, 进入init方法

final float spillper = //908行
        job.getFloat(JobContext.MAP_SORT_SPILL_PERCENT, (float)0.8); //溢写百分比0.8
final int sortmb = job.getInt(MRJobConfig.IO_SORT_MB,
          MRJobConfig.DEFAULT_IO_SORT_MB); //缓冲区大小100M

sorter = ReflectionUtils.newInstance(job.getClass( //994行
                   MRJobConfig.MAP_SORT_CLASS, QuickSort.class, //获取快速排序对象
                   IndexedSorter.class), job); //不改变元素位置,只根据索引排序

kvbuffer = new byte[maxMemUsage]; //kvbuffer创建 1000行
bufvoid = kvbuffer.length;
kvmeta = ByteBuffer.wrap(kvbuffer).order(ByteOrder.nativeOrder()).asIntBuffer(); //kvmeta

// k/v serialization kv序列化 1017行
// output counters 计数器 1027行
// compression 压缩 1034行
// combiner 1043行

------缓冲区构建完成--------
    
//缓冲区构建完成后,根据NumReduceTasks获得分区器
    partitions = jobContext.getNumReduceTasks(); //711行
      if (partitions > 1) {
        partitioner = (org.apache.hadoop.mapreduce.Partitioner<K,V>)
          ReflectionUtils.newInstance(jobContext.getPartitionerClass(), job);
      } else {
        partitioner = new org.apache.hadoop.mapreduce.Partitioner<K,V>() {
          @Override
          public int getPartition(K key, V value, int numPartitions) {
            return partitions - 1;
          }
        };

// 执行WordCountMapper中的run方法。 实际执行的是WordCountMapper继承的Mapper中的run方法。  799行
mapper.run(mapperContext); 


//进入在Mapper中的run方法中 
public void run(Context context) throws IOException, InterruptedException {
    setup(context);
    try {
      while (context.nextKeyValue()) {
        map(context.getCurrentKey(), context.getCurrentValue(), context); //执行用户的map方法
      }
//不断执行context.write(outK,outV),每次写出一对KV都会被缓冲区收集 
 

Shuffle流程

1.进入context.write(outK,outV)
//WrapperMapper类下,是Mapper的包装类
mapContext.write(key, value); //112行

//再进入
output.write(key, value); //89行

//再进人  MapTask下,将map写出的kv 计算好分区后,收集到缓冲区中。
collector.collect(key, value, partitioner.getPartition(key, value, partitions)); //727行

//进入collect方法,若满足溢写条件,开始发生溢写
startSpill(); //Maptask下1126行

spillReady.signal(); //线程间的通信,通知溢写线程开始溢写 1602行 

//溢写线程调用 sortAndSpill() 方法发生溢写操作
private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException{//1609
      //approximate the length of the output file to be the length of the
      //buffer + header lengths for the partitions
      final long size = distanceTo(bufstart, bufend, bufvoid) +
                  partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      FSDataOutputStream partitionOut = null;
      try {
        // create spill file
        final SpillRecord spillRec = new SpillRecord(partitions); //根据分区创建溢写文件
        final Path filename =
            mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);
      }
}
 
//溢写前先进行排序 1625行
sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);

// 进行溢写操作,1629行
for (int i = 0; i < partitions; ++i) { //根据分区溢写
          IFile.Writer<K, V> writer = null;
          try {
            long segmentStart = out.getPos();
            partitionOut = CryptoUtils.wrapIfNecessary(job, out, false);
            writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec,
                                      spilledRecordsCounter);
            if (combinerRunner == null) { //如果没有设置combiner,则直接溢写
              // spill directly
              DataInputBuffer key = new DataInputBuffer();
              while (spindex < mend &&
                  kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
                final int kvoff = offsetFor(spindex % maxRec);
                int keystart = kvmeta.get(kvoff + KEYSTART);
                int valstart = kvmeta.get(kvoff + VALSTART);
                key.reset(kvbuffer, keystart, valstart - keystart);
                getVBytesForOffset(kvoff, value);
                writer.append(key, value);
                ++spindex;
              }
            } else {
              int spstart = spindex;
              while (spindex < mend &&
                  kvmeta.get(offsetFor(spindex % maxRec)
                            + PARTITION) == i) {
                ++spindex;
              } 
            }
          }
}

//溢写完成,关闭
writer.close(); //1667行

//map持续往缓冲区写,达到溢写条件,就继续溢写 ........ 可能整个过程中发生N次溢写。

//假如上一次溢写完后,剩余进入的到缓冲区的数据没有达到溢写条件,那么当map中的所有的数据都已经处理完后,在关闭output时,会把缓冲区中的数据刷到磁盘中(其实就是没有达到溢写条件的数据也要写到磁盘)
output.close(mapperContext); //805行

//进入close,数据刷写,735行
collector.flush();

//进入flush,通过溢写的方法进行剩余数据的刷写 1505行
sortAndSpill();

//溢写完成后,进行归并 1527行
mergeParts();

for(int i = 0; i < numSpills; i++) {
        filename[i] = mapOutputFile.getSpillFile(i);
        finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
      } //根据溢写的次数,得到要归并多少个溢写文件 //1852行

//生成最终存储数据的两个的文件(目前是空的)
Path finalOutputFile =
          mapOutputFile.getOutputFileForWrite(finalOutFileSize); //内容
Path finalIndexFile =
          mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize); //索引


//按照分区进行归并 1925行
for (int parts = 0; parts < partitions; parts++) {
          //create the segments to be merged
          List<Segment<K,V>> segmentList =
            new ArrayList<Segment<K, V>>(numSpills);
}
//归并
RawKeyValueIterator kvIter = Merger.merge(job, rfs, //1950行
                         keyClass, valClass, codec,
                         segmentList, mergeFactor,
                         new Path(mapId.toString()),
                         job.getOutputKeyComparator(), reporter, sortSegments,
                         null, spilledRecordsCounter, sortPhase.phase(),
                         TaskType.MAP);
//将归并的文件写入磁盘 1959行
long segmentStart = finalOut.getPos();
finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut, false);
Writer<K, V> writer = //创建输出流
    new Writer<K, V>(job, finalPartitionOut, keyClass, valClass, codec, spilledRecordsCounter);
if (combinerRunner == null || numSpills < minSpillsForCombine) { //如果combiner为空或者溢写次数少于
    Merger.writeFile(kvIter, writer, reporter, job);  //minSpillsForCombine(默认为3),则不走combine
} else {
    combineCollector.setWriter(writer);
    combinerRunner.combine(kvIter, combineCollector);
}
//归并完成,关流 1972
writer.close();

//删除溢写文件 1991
for(int i = 0; i < numSpills; i++) {
    rfs.delete(filename[i],true);
}

//剩余file.out  file.out.index,等待reduce的拷贝

ReduceTask机制

1.在LocalJobRunner的内部类Job中的run()方法中

try {
   if (numReduceTasks > 0) {//根据设置的reduceTask的个数,创建对应个数				  			  								LocalJobRunner$Job$ReduceTaskRunnable
         List<RunnableWithThrowable> reduceRunnables = getReduceTaskRunnables(
                jobId, mapOutputFiles);
         ExecutorService reduceService = createReduceExecutor();
         unTasks(reduceRunnables, reduceService, "reduce");
          }
}   
//执行LocalJobRunner$Job$ReduceTaskRunnable 中的run方法
//创建ReduceTask对象 334行
ReduceTask reduce = new ReduceTask(systemJobFile.toString(),reduceId, taskId, mapIds.size(), 1);

// 执行ReduceTask的run方法 347行
reduce.run(localConf, Job.this);

//进入ReduceTask类
//run方法中,先设置步骤 324行
if (isMapOrReduce()) {
      copyPhase = getProgress().addPhase("copy");
      sortPhase  = getProgress().addPhase("sort");
      reducePhase = getProgress().addPhase("reduce");
    }
//copy,sort过程省略吧,好累了

// 然后runNewReducer //390行
if (useNewApi) {
      runNewReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass);
    }

//执行WordCountReducer的run方法 ,实际执行的是WordCountReducer继承的Reducer类中的run方法. 628行
reducer.run(reducerContext); 

//进入run方法 171行
reduce(context.getCurrentKey(), context.getValues(), context); //终于执行到用户WordCountReducer中的 reduce方法.

context.write(k,v) //用户的reduce中将处理完的kv写出.
    
reduceContext.write(key, value); //105行
output.write(key, value); //89行
real.write(key,value);  // 通过RecordWriter将kv写出 559行
out.write(NEWLINE);  //通过输出流将数据写到结果文件中 101行

//终于结束了,写这些框架的人都是怎么样的天才!

  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值