Hadoop中自定义排序,分区,分组

–Reducer

import java.io.IOException;    
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Reducer;  

public class HotReduce extends Reducer<KeyPair, Text, KeyPair, Text>{   
        @Override  
        protected void reduce(KeyPair kp, Iterable<Text> i,Context context)  
                        throws IOException, InterruptedException {  
                for (Text text : i) {  
                        context.write(kp, text);  
                }  
        }           
}  

–Mapper

import java.io.IOException;  
import java.text.ParseException;  
import java.text.SimpleDateFormat;  
import java.util.Calendar;  
import java.util.Date;    
import org.apache.hadoop.io.LongWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Mapper;  

public class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text>{  
        @Override  
        protected void map(LongWritable key, Text value,Context context)  
                        throws IOException, InterruptedException {  
                String line=value.toString();  
                String[] ss=line.split("\t");  
                if (ss.length==2){  
                        int year=Integer.parseInt(ss[0].substring(0, 4));  
                        int hot=Integer.parseInt(ss[1].substring(0, ss[1].indexOf("°C")));  
                        KeyPair kp=new KeyPair();  
                        kp.setYear(year);  
                        kp.setHot(hot);  
                        context.write(kp, value);  
                }  
        }  
}  

–KeyPair 自定义封装类

import java.io.DataInput;  
import java.io.DataOutput;  
import java.io.IOException;  

import org.apache.hadoop.io.WritableComparable;  


public class KeyPair implements WritableComparable<KeyPair> {  
        private int year;  
        private int hot;  
        public int getYear() {  
                return year;  
        }  
        public void setYear(int year) {  
                this.year = year;  
        }  
        public int getHot() {  
                return hot;  
        }  
        public void setHot(int hot) {  
                this.hot = hot;  
        }  
        @Override  反序列化
        public void readFields(DataInput in) throws IOException {  
                this.year=in.readInt();  
                this.hot=in.readInt();  

        }  
        @Override  序列化
        public void write(DataOutput out) throws IOException {  
                out.writeInt(year);  
                out.writeInt(hot);  

        }  
        @Override  对比
        public int compareTo(KeyPair o) {                   
                int result=Integer.compare(year, o.getYear());  
                if (result!=0){  
                        return result;  
                }  
                return Integer.compare(hot, o.hot);  
        }  

        @Override  
        public String toString() {  
                return year+"\t"+hot;  
        }  

        @Override  
        public int hashCode() {  
                return new Integer(year+hot).hashCode();  
        }  
} 

–HotPartition
//自定义分区

import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Partitioner;  

public class HotPartition extends Partitioner<KeyPair, Text>{  

        @Override  
        public int getPartition(KeyPair key, Text value, int num) {  

                return (key.getYear()*127%num);  
        }  

}  

–HotSort 自定义排序

import org.apache.hadoop.io.WritableComparable;  
import org.apache.hadoop.io.WritableComparator;  

public class HotSort extends WritableComparator{  

        public HotSort() {  
                super(KeyPair.class, true);  //排序时使用自定义分装的类进行排序
        }  

        @Override  
        public int compare(WritableComparable a, WritableComparable b) {  
                KeyPair o1=(KeyPair) a;  
                KeyPair o2=(KeyPair) b;  
                int res=Integer.compare(o1.getYear(), o2.getYear());  
                if (res!=0){  
                        return res;  
                }  
                return -Integer.compare(o1.getHot(),o2.getHot());//降序排序  
        }  


}  

–HotGroup
自定义分组

import org.apache.hadoop.io.WritableComparable;  
import org.apache.hadoop.io.WritableComparator;  

public class HotGroup extends WritableComparator{  

        public HotGroup() {  
                super(KeyPair.class, true);  
        }  

        @Override  
        public int compare(WritableComparable a, WritableComparable b) {  
                KeyPair o1=(KeyPair) a;  
                KeyPair o2=(KeyPair) b;  

                return Integer.compare(o1.getYear(),o2.getYear());  
        }  

} 
public class Job{
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = new Job(configuration,"partation");
        job.setJarByClass(Job.class);
        job.setMapperClass(HotMapper.class);
        job.setReducerClass(HotReducer.class);
        job.setOutputKeyClass(KeyPair.class);
        job.setOutputValueClass(Text.class);

        job.setGroupingComparatorClass(FirstPartation.class);
        job.setNumReduceTasks(3);
        job.setSortComparatorClass(SortHot.class);
        job.setGroupingComparatorClass(GroupHot.class);

        FileInputFormat.addInputPath(job, new Path("/user/root/books"));
        FileOutputFormat.setOutputPath(job, new Path("/user/root/bookout"));
        System.exit(job.waitForCompletion(true)?0:1);
    }
}



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值