mapreduce实现二次排序

最新推荐文章于 2023-03-07 23:40:20 发布

一个喜欢写代码的小龙龙

最新推荐文章于 2023-03-07 23:40:20 发布

阅读量287

点赞数

分类专栏： mapreduce java学习文章标签：二次排序

本文链接：https://blog.csdn.net/jws123123123/article/details/102789463

版权

java学习同时被 2 个专栏收录

27 篇文章 0 订阅

订阅专栏

mapreduce

20 篇文章 0 订阅

订阅专栏

最近几天在准备mapreduce。然后试着写了一个mapreduce程序。来实现二次排序。
话不多说，我们先自定义一个键类型。切记键类型实现WritableComparable。接口，然后重写toString,writable,readfield.
先附上已经写好的代码：

package com.soft;

import org.apache.hadoop.examples.SecondarySort;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


public class InPair implements WritableComparable<InPair> {
    private IntWritable first;
    private IntWritable second;
    public void set(IntWritable first,IntWritable second){
        this.first = first;
        this.second = second;
    }
    //这里空参构造器必须写，否则会报错
    public InPair(){
        set(new IntWritable(),new IntWritable());
    }
    //这里将类型强制转换。
    public InPair(int first,int second){
        set(new IntWritable(first),new IntWritable(second));
    }
    public InPair(IntWritable first,IntWritable second){
        set(first,second);
    }

    public void setFirst(IntWritable first) {
        this.first = first;
    }

    public void setSecond(IntWritable second) {
        this.second = second;
    }

    public IntWritable getFirst() {
        return first;
    }

    public IntWritable getSecond() {
        return second;
    }
    public void write(DataOutput out)throws IOException {
        first.write(out);
        second.write(out);
    }
    public void readFields(DataInput in)throws IOException{
        first.readFields(in);
        second.readFields(in);
    }
    public String toString(){
        return first+"\t"+second;
    }
    //强制转换类
    //这里的话这个东西  可以写可以你不写。
    /*
    public boolean equals(Object o){
        if (o instanceof InPair){
            InPair tp = (InPair)o;
            return first.equals(tp.first)&&second.equals(tp.second);
        }
        return false;
    }

     */
    //重写比较方法，实现二次排序
    public int compareTo(InPair tp){
        int cmp = first.compareTo(tp.first);
        if (cmp !=0) {
            return cmp;
        }
        return second.compareTo(tp.second);
    }
}

这里的话自定义键类型已经写好，接下来是map程序

package com.soft;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class soft {
//这里切记，是LongWritable,Text. 千万不能写错。
        public static class map1 extends Mapper<LongWritable, Text,InPair, NullWritable>{
        private IntWritable key1 = new IntWritable();
        private IntWritable key2 = new IntWritable();
        public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{
            String line = value.toString();
            String[] list = line.split("\t");
            key1.set(Integer.parseInt(list[0]));
            key2.set(Integer.parseInt(list[1]));
            context.write(new InPair(key1,key2),NullWritable.get());
        }
    }
    public static class reduce1 extends Reducer<InPair,NullWritable,InPair,NullWritable>{
        public void reduce(InPair key,Iterable<NullWritable>values,Context context)throws IOException,InterruptedException{
            context.write(key,NullWritable.get());
        }
    }
//这里的话我觉得可以省略rudece方法  因为reduce仅仅是将数据写了一下
    public static void main(String[] args)throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(soft.class);
        job.setMapperClass(map1.class);
        job.setReducerClass(reduce1.class);
        job.setMapOutputKeyClass(InPair.class);
        job.setMapOutputValueClass(NullWritable.class);
        FileInputFormat.setInputPaths(job,new Path("E:\\data1\\ceshi.txt"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\data1\\out"));
        Boolean b = job.waitForCompletion(true);
        System.out.println(b?0:1);
    }
}

一个喜欢写代码的小龙龙

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
mapreduce实现二次排序

最近几天在准备mapreduce。然后试着写了一个mapreduce程序。来实现二次排序。话不多说，我们先自定义一个键类型。切记键类型实现WritableComparable。接口，然后重写toString,writable,readfield.先附上已经写好的代码：package com.soft;import org.apache.hadoop.examples.Secondar...
复制链接

扫一扫

专栏目录