MapReduce的自定义InputFormat

源码如下:
InputFormat类

import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

public class MyInputFormat extends FileInputFormat {
    @Override
    public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
        MyRR myRR = new MyRR();
        myRR.initialize(inputSplit,context);
        return myRR;
    }
}

RecordReader类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyRR extends RecordReader {
    private FileSplit split;
    private Configuration conf;
    private BytesWritable bytesWritable = new BytesWritable();
    private boolean next=false;
    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        split = (FileSplit) inputSplit;
        conf = new Configuration();
    }
    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (!next){
            Path path = split.getPath();
            FileSystem fileSystem = FileSystem.get(conf);
            FSDataInputStream open = fileSystem.open(path);
            byte[] bytes = new byte[(int) split.getLength()];
            IOUtils.readFully(open,bytes,0,bytes.length);
            bytesWritable.set(bytes,0,bytes.length);
            open.close();
            fileSystem.close();
            next=true;
            return true;
        }
        return false;
    }
    @Override
    public Object getCurrentKey() throws IOException, InterruptedException {
        return NullWritable.get();
    }
    @Override
    public Object getCurrentValue() throws IOException, InterruptedException {
        return bytesWritable;
    }
    @Override
    public float getProgress() throws IOException, InterruptedException {
        return 0;
    }
    @Override
    public void close() throws IOException {}
}

map类

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyMap extends Mapper<NullWritable, BytesWritable,Text,BytesWritable> {
    @Override
    protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException {
        FileSplit split = (FileSplit) context.getInputSplit();
        String filename = split.getPath().getName();
        context.write(new Text(filename),value);
    }
}

驱动类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
public class MyDriver {
    public static void main(String[] args) throws Exception {
        Configuration conf= new Configuration();
        Job job = Job.getInstance(conf, "MyInputFormat");
        job.setJarByClass(MyDriver.class);
        job.setMapperClass(MyMap.class);
        job.setInputFormatClass(MyInputFormat.class);
        MyInputFormat.addInputPath(job,new Path(args[0]));
    	job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job,new Path(args[1]));
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(BytesWritable.class);
        System.exit(job.waitForCompletion(true)?0:1);
    }
}
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值