mapreduce代码

最新推荐文章于 2024-07-11 07:15:00 发布

future_bloom

最新推荐文章于 2024-07-11 07:15:00 发布

阅读量53

点赞数 1

文章标签： mapreduce 大数据

本文链接：https://blog.csdn.net/jjgii/article/details/134228474

版权

package com.simple;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class IntPair implements WritableComparable<IntPair> {
   private int first;
   private int second;
    public IntPair() {
        super();
    }
    public IntPair(int first, int second) {
        super();
        this.first = first;
        this.second = second;
    }
    public int getFirst() {
        return first;
    }
    public void setFirst(int first) {
        this.first = first;
    }
    public int getSecond() {
        return second;
    }
　　
    public void setSecond(int second) {
        this.second = second;
    }
　　
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + first;
        result = prime * result + second;
        return result;
    }
　　
    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        IntPair other = (IntPair) obj;
        if (first != other.first)
            return false;
        if (second != other.second)
            return false;
        return true;
    }
　　
    @Override
    public String toString() {
        return "IntPair [first=" + first + ", second=" + second + "]";
    }
　　
    @Override
    public int compareTo(IntPair intPair) {
        //首先比较第一个数，当第一个数不一样时，对第一个数进行比较，设置排序规则
        if(first-intPair.getFirst()!=0) {
            return first>intPair.first?1:-1;
        }else {
        //当第一个数一样时,比较第二个数，并设置排序规则
            return second>intPair.second?1:-1;
        }
    }
　　
    @Override
    //readFiedls方法用于序列化过程中的数据读取
    public void readFields(DataInput in) throws IOException {
        this.first=in.readInt();
        this.second=in.readInt();
    }
　　
    @Override
    //write方法用于序列化过程中的数据写出
    public void write(DataOutput out) throws IOException {
        // TODO Auto-generated method stub
        out.writeInt(first);
        out.writeInt(second);
    }
}

package com.simple;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/*
 * 分区函数类
   实现其自定义分区功能
 */
public class FirstPartitioner extends Partitioner<IntPair, Text> {
    @Override
    public int getPartition(IntPair key, Text value, int numPartitions) {
        //这里取key的hashcode值*127，然后取其绝对值,对numPartitions取模，这里numPartitions与ReduceTask数保持一致
        return Math.abs(key.getFirst() * 127) % numPartitions;
    }
}

package com.simple;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/*
 * 分组函数类
 */
public class GroupingComparator extends WritableComparator {
    // 必须要有这个构造器，构造器中必须要实现这个
    protected GroupingComparator() {
        super(IntPair.class, true);
    }
    // 重载 compare：对组合键按第一个自然键排序分组
    @SuppressWarnings("rawtypes")
    public int compare(WritableComparable w1, WritableComparable w2) {
        IntPair ip1 = (IntPair) w1;
        IntPair ip2 = (IntPair) w2;
        return ip1.compareTo(ip2);
    }
}

package com.simple;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SecondarySortMapper extends Mapper<LongWritable, Text, IntPair, Text> {
    private final IntPair keyPair = new IntPair();
    String[] lineArr = null;
    public void map(LongWritable key, Text value, Context context) 
                                throws IOException, InterruptedException {
        //获取行的内容并以一个空格进行分割，然后将切割后的第一个字段赋值给keyPair的first，
        // 第二个字段赋值给keyPair的second，并以keyPair作为k，value作为v，写出
        String line = value.toString();
        lineArr = line.split(" ", -1);
        keyPair.setFirst(Integer.parseInt(lineArr[0]));
        keyPair.setSecond(Integer.parseInt(lineArr[1]));
        context.write(keyPair, value);
    }
}

package com.simple;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class SecondarySortReducer extends Reducer<IntPair, Text, Text, Text> {
    private static final Text SEPARATOR = new Text("---------------------");
    public void reduce(IntPair key, Iterable<Text> values, Context context) 
                                    throws IOException, InterruptedException {
        //对每一个IntPair输出一个"-------"划分观察
        context.write(SEPARATOR, null);
        //迭代输出
        for (Text val : values) {
            context.write(null, val);
        }
    }
}

package com.simple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class SecondarySortJob {
    public static void main(String[] args) throws Exception {
        // 获取作业对象
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://localhost:9000");
        Job job = Job.getInstance(conf);
        // 设置主类
        job.setJarByClass(SecondarySortJob.class);
        // 设置job参数
        job.setMapperClass(SecondarySortMapper.class);
        job.setReducerClass(SecondarySortReducer.class);
        job.setMapOutputKeyClass(IntPair.class);
        job.setMapOutputValueClass(Text.class);
        // 设置分区
        job.setPartitionerClass(FirstPartitioner.class);
        // 设置分组
        job.setGroupingComparatorClass(GroupingComparator.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        // 设置job输入输出
        FileInputFormat.setInputPaths(job, new Path("/SecondarySort.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/simple/output"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}