CombineFileInputFormat

package cn.mrzhou.test;

import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.util.LineReader;
/**
 *
 * @author zhoulei
 * @version 1.0.0 2013-07-26
 */
public class MyCombineFileInputFormat extends CombineFileInputFormat<LongWritable, Text> {

 @Override
 public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
  //这个地方返回的是CombinerFileRecordReader  其实在CombineFileRecordReader 其实将 我们下面自定义的 CombineLineRecordReader 进行了一个封装

//并反射机制调用 实际的CombineLineRecordReader的 具体方法

//在 CombineFileRecordReader 构造方法中

//rrConstructor = rrClass.getDeclaredConstructor(constructorSignature);
// rrConstructor.setAccessible(true);

//initNextRecordReader();

//在initNextRecordReader用来访问 实际CombineLineRecordReader 的构造方法 如下:

//curReader =  rrConstructor.newInstance(new Object []  {split, context, Integer.valueOf(idx)});

//这里要注意的是 idx 是一个序号,为什么要这个序号 因为是Combine 所以每个split 可能有多个path 分别传进来

//这个idx 通过没调用一次initNextRecordReader idx++ 一次来递增

// 直道  if (idx == split.getNumPaths()) { //才结束
//      return false;
 //   }


  return new CombineFileRecordReader<LongWritable, Text>((CombineFileSplit)split, context, CombineLineRecordReader.class);
 }

}

class CombineLineRecordReader extends RecordReader<LongWritable, Text>{
 private long start;
 private long end;
 private long aLength;
 private Path path;
 private LineReader reader;
 private long pos;
 private LongWritable key;
 private Text value;
 private int maxLineLength;
 //所以这个构造方法里面 就会 有一个Integer i 的形参。
 public CombineLineRecordReader(InputSplit split,TaskAttemptContext context,Integer i) throws IOException{
  CombineFileSplit fileSplit = (CombineFileSplit)split;
  maxLineLength = context.getConfiguration().getInt("mapred.linerecordreader.maxlength",Integer.MAX_VALUE);
  start = fileSplit.getOffset(i);
  aLength = fileSplit.getLength(i);
  end = start+aLength;
  path = fileSplit.getPath(i);
  
  FileSystem fs = FileSystem.get(context.getConfiguration());
  
  FSDataInputStream in = fs.open(path);
  boolean skipFirstLine = false;
   
  if (start != 0) { 
   skipFirstLine = true; 
   --start; 
   in.seek(start); 
        } 
   reader = new LineReader(in); 
   if (skipFirstLine) // skip first line and re-establish "startOffset". 
         { 
    int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - start)); 
    start += readNum; 
         } 
   this.pos = start;
 }
 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
  
 }
 

 @Override
 public boolean nextKeyValue() throws IOException, InterruptedException {
   if (key == null) {
         key = new LongWritable();
       }
       key.set(pos); //偏移量作为key
       if (value == null) {
         value = new Text();
       }
       int newSize = 0;
       while (pos < end) {
         newSize = reader.readLine(value, maxLineLength,
                               Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),
                                        maxLineLength));
         if (newSize == 0) {
           break;
         }
         pos += newSize;
         if (newSize < maxLineLength) {
           break;
         }
       }
       if (newSize == 0) {
         key = null;
         value = null;
           return false;
       } else {
         return true;
       }
 }

 @Override
 public LongWritable getCurrentKey() throws IOException, InterruptedException {
  // TODO Auto-generated method stub
  return key;
 }

 @Override
 public Text getCurrentValue() throws IOException, InterruptedException {
  // TODO Auto-generated method stub
  return value;
 }

 @Override
 public float getProgress() throws IOException, InterruptedException {
   if (start == end) {
        return 0.0f;
      } else {
        return Math.min(1.0f, (pos - start) / (float)(end - start));
      }
 }

 @Override
 public void close() throws IOException {
  reader.close();
 }
 
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值