hadoop的InputFormat简单demo_inputformat example-CSDN博客

本文链接：https://blog.csdn.net/qq_20641565/article/details/52770522

1.序列化对象

package com.lijie.inutformat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class ScorePair implements WritableComparable<ScorePair>{

    private float a;
    private float b;
    private float c;
    private float d;
    private float e;


    public float getA() {
        return a;
    }

    public void setA(float a) {
        this.a = a;
    }

    public float getB() {
        return b;
    }

    public void setB(float b) {
        this.b = b;
    }

    public float getC() {
        return c;
    }

    public void setC(float c) {
        this.c = c;
    }

    public float getD() {
        return d;
    }

    public void setD(float d) {
        this.d = d;
    }

    public float getE() {
        return e;
    }

    public void setE(float e) {
        this.e = e;
    }

    public ScorePair() {
        super();
        // TODO Auto-generated constructor stub
    }

    public ScorePair(float a, float b, float c, float d, float e) {
        super();
        this.a = a;
        this.b = b;
        this.c = c;
        this.d = d;
        this.e = e;
    }
    public void set(float a, float b, float c, float d, float e) {
        this.a = a;
        this.b = b;
        this.c = c;
        this.d = d;
        this.e = e;
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + Float.floatToIntBits(a);
        result = prime * result + Float.floatToIntBits(b);
        result = prime * result + Float.floatToIntBits(c);
        result = prime * result + Float.floatToIntBits(d);
        result = prime * result + Float.floatToIntBits(e);
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        ScorePair other = (ScorePair) obj;
        if (Float.floatToIntBits(a) != Float.floatToIntBits(other.a))
            return false;
        if (Float.floatToIntBits(b) != Float.floatToIntBits(other.b))
            return false;
        if (Float.floatToIntBits(c) != Float.floatToIntBits(other.c))
            return false;
        if (Float.floatToIntBits(d) != Float.floatToIntBits(other.d))
            return false;
        if (Float.floatToIntBits(e) != Float.floatToIntBits(other.e))
            return false;
        return true;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        // TODO Auto-generated method stub
        a=in.readFloat();
        b=in.readFloat();
        c=in.readFloat();
        d=in.readFloat();
        e=in.readFloat();
    }

    @Override
    public void write(DataOutput out) throws IOException {
        // TODO Auto-generated method stub
        out.writeFloat(a);
        out.writeFloat(b);
        out.writeFloat(c);
        out.writeFloat(d);
        out.writeFloat(e);
    }

    @Override
    public int compareTo(ScorePair o) {
        // TODO Auto-generated method stub
        return 0;
    }

}

2.InputFormat类

package com.lijie.inutformat;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;


public class ScoreInputFormat extends FileInputFormat<Text, ScorePair>{

    @Override
    protected boolean isSplitable(JobContext context, Path filename) {
        // TODO Auto-generated method stub
        return false;
    }

    @Override
    public RecordReader<Text, ScorePair> createRecordReader(    InputSplit arg0,
                                                            TaskAttemptContext arg1)    throws IOException,
                                                                                        InterruptedException {
        // TODO Auto-generated method stub
        return new ScoreRecordReader();
    }

}

class ScoreRecordReader extends RecordReader<Text, ScorePair>{

    private LineReader in;
    private Text lineKey;
    private ScorePair lineValue;
    private Text line;

    @Override
    public void close() throws IOException {
        // TODO Auto-generated method stub
        if(in !=null){
            in.close();
        }
    }

    @Override
    public Text getCurrentKey() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return lineKey;
    }

    @Override
    public ScorePair getCurrentValue() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return lineValue;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return 0;
    }

    @Override
    public void initialize(InputSplit arg0, TaskAttemptContext arg1)    throws IOException,
                                                                        InterruptedException {
        FileSplit split = (FileSplit)arg0;
        Configuration conf = arg1.getConfiguration();
        Path path = split.getPath();
        FileSystem fs = path.getFileSystem(conf);

        FSDataInputStream fileIn = fs.open(path);
        in = new LineReader(fileIn,conf);
        line = new Text();
        lineKey = new Text();
        lineValue = new ScorePair();
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {

        int lineSize = in.readLine(line);
        if(lineSize == 0) return false;
        String[] split = line.toString().split("\\s+");
        if(split.length != 7){
            throw new IOException("数据错误!");
        }
        float a,b,c,d,e;
        a = Float.parseFloat(split[2].trim());
        b = Float.parseFloat(split[3].trim());
        c = Float.parseFloat(split[4].trim());
        d = Float.parseFloat(split[5].trim());
        e = Float.parseFloat(split[6].trim());
        lineKey.set(split[0]+"\t"+split[1]);
        lineValue.set(a, b, c, d, e);
        return true;
    }

}

3.mapreduce程序

package com.lijie.inutformat;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class ScoreMapReduce extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        String[] path = {"hdfs://lijie:9000/score/*","hdfs://lijie:9000/score/out"};
        int run = ToolRunner.run(new Configuration(), new ScoreMapReduce(), path);
        System.exit(run);
    }

    public static class ScoreMap extends Mapper<Text, ScorePair, Text, ScorePair> {
        @Override
        protected void map(Text key, ScorePair value, Context context)  throws IOException,
                                                                        InterruptedException {
            context.write(key, value);
        }
    }

    public static class ScoreReduce extends Reducer<Text, ScorePair, Text, Text> {
        @Override
        protected void reduce(  Text key, Iterable<ScorePair> values,
                                Context context) throws IOException, InterruptedException {
            ScorePair value = values.iterator().next();
            //sum
            float sum = value.getA()+value.getB()+value.getC()+value.getD()+value.getE();
            //avg
            float avg = sum/5;
            context.write(key, new Text("sum:"+sum+"\t"+"avg:"+avg));
        }
    }

    @Override
    public int run(String[] arg) throws Exception {
        Configuration conf = new Configuration();
        Path path = new Path(arg[1]);
        FileSystem fs = path.getFileSystem(conf);
        if(fs.isDirectory(path)){
            fs.delete(path, true);
        }

        Job job = new Job(conf, "score");

        job.setJarByClass(ScoreMapReduce.class);

        job.setMapperClass(ScoreMap.class);
        job.setReducerClass(ScoreReduce.class);

        job.setInputFormatClass(ScoreInputFormat.class);

        FileInputFormat.addInputPath(job, new Path(arg[0]));
        FileOutputFormat.setOutputPath(job, path);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ScorePair.class);

        job.waitForCompletion(true);
        return 0;
    }

}