MapReduce高效topN

输出每个用户评分最高的N条记录电影高效topN案例
1)用户自定义bean为KEY 并指定排序规则
2) 用户自定义分发器 ,按照用户执行的属性将数据分发到不同的reduce端
3) 分组迭代数据输出 ,指定属性进行分组
在这里插入图片描述

自定义KEY的排序
/**

  • 用户自定义Bean作为KEY 指定排序规则 , 实现hdp的序列化
  • @author DOIT_HANG_GE
  • @version 2019年2月27日 WritableComparable 可自定义排序的序列化bean
    */
    public class MovieBean implements WritableComparable {
    private String movie ;
    private int rate ;
    private long timeStamp ;
    private int uid ;
    public void set(String movie, int rate, long timeStamp, int uid) {
    this.movie = movie;
    this.rate = rate;
    this.timeStamp = timeStamp;
    this.uid = uid;
    }
    public String getMovie() {
    return movie;
    }
    public void setMovie(String movie) {
    this.movie = movie;
    }
    public int getRate() {
    return rate;
    }
    public void setRate(int rate) {
    this.rate = rate;
    }
    public long getTimeStamp() {
    return timeStamp;
    }
    public void setTimeStamp(long timeStamp) {
    this.timeStamp = timeStamp;
    }
    public int getUid() {
    return uid;
    }
    public void setUid(int uid) {
    this.uid = uid;
    }
    @Override
    public String toString() {
    return “MovieBean [movie=” + movie + “, rate=” + rate + “, timeStamp=” + timeStamp + “, uid=” + uid + “]”;
    }
    @Override
    public void write(DataOutput out) throws IOException {
    out.writeUTF(movie);
    out.writeInt(rate);
    out.writeLong(timeStamp);
    out.writeInt(uid);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
    this.movie = in.readUTF() ;
    this.rate = in.readInt() ;
    this.timeStamp = in.readLong() ;
    this.uid = in.readInt() ;
    }
    @Override
    public int compareTo(MovieBean o) {
    //用户uid相同 按照分数降序排列
    return Integer.compare(this.uid, o.getUid())==0?Integer.compare(o.getRate(), this.rate):Integer.compare(this.uid, o.getUid());
    }
    }

自定义分区
/**

  • 自定义数据分区的逻辑按照用户的uid进行分区<MovieBean, NullWritable> 泛型 map端输出的数据
  • @author DOIT_HANG_GE
  • @version 2019年2月27日
    /
    public class MyPartitioner extends Partitioner<MovieBean, NullWritable> {
    @Override
    public int getPartition(MovieBean key, NullWritable value, int numPartitions) {
    return (key.getUid() + “”.hashCode() & Integer.MAX_VALUE) % numPartitions;
    }
    }
    自定义reduce端的分组迭代GroupingComparator
    /
    *
  • reduce端按照指定的key为组迭代
  • @author DOIT_HANG_GE
  • @version 2019年2月27日
    */
    public class MyGroupingComparator extends WritableComparator{
    public MyGroupingComparator(){
    // true 反序列化成WritableComparable对象
    super(MovieBean.class,true) ;
    }
    // reduce 会从map输出的文件中获取两个相邻的key 并反序列化成WritableComparable
    // 判断两个相邻的key是否是同一组数据 然后迭代
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
    MovieBean k1 = (MovieBean) a ;
    MovieBean k2 = (MovieBean) b ;
    return Integer.compare(k1.getUid(), k2.getUid());
    }
    }

MAPREDUCE

public class TopDemo {
public static class TopMapper extends Mapper<LongWritable, Text, MovieBean, NullWritable> {
Gson gs = new Gson();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, MovieBean, NullWritable>.Context context)
throws IOException, InterruptedException {
String str = value.toString();
try {
MovieBean movieBean = gs.fromJson(str, MovieBean.class);
// map的数据输出
context.write(movieBean, NullWritable.get());
} catch (JsonSyntaxException e) {
e.printStackTrace();
}
}
}

public static class TopReducer extends Reducer<MovieBean, NullWritable, MovieBean, NullWritable> {
	@Override
	protected void reduce(MovieBean mb, Iterable<NullWritable> iters,
			Reducer<MovieBean, NullWritable, MovieBean, NullWritable>.Context context)
			throws IOException, InterruptedException {
		int count = 0;
		for (NullWritable nullWritable : iters) {
			count++;
			context.write(mb, nullWritable);
			if (count == 20) {
				return;
			}
		}
	}
}
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf);
	job.setMapperClass(TopMapper.class);
	job.setReducerClass(TopReducer.class);
	job.setMapOutputKeyClass(MovieBean.class);
	job.setMapOutputValueClass(NullWritable.class);
	job.setOutputKeyClass(MovieBean.class);
	job.setOutputValueClass(NullWritable.class);
	job.setPartitionerClass(MyPartitioner.class);
	job.setGroupingComparatorClass(MyGroupingComparator.class);
	FileInputFormat.setInputPaths(job, new Path("d:/data/movie/input"));
	FileOutputFormat.setOutputPath(job, new Path("d:/data/movie/output3"));
	boolean res = job.waitForCompletion(true);
	System.exit(res?0:-1);
}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值