输出每个用户评分最高的N条记录电影高效topN案例
1)用户自定义bean为KEY 并指定排序规则
2) 用户自定义分发器 ,按照用户执行的属性将数据分发到不同的reduce端
3) 分组迭代数据输出 ,指定属性进行分组
自定义KEY的排序
/**
- 用户自定义Bean作为KEY 指定排序规则 , 实现hdp的序列化
- @author DOIT_HANG_GE
- @version 2019年2月27日 WritableComparable 可自定义排序的序列化bean
*/
public class MovieBean implements WritableComparable {
private String movie ;
private int rate ;
private long timeStamp ;
private int uid ;
public void set(String movie, int rate, long timeStamp, int uid) {
this.movie = movie;
this.rate = rate;
this.timeStamp = timeStamp;
this.uid = uid;
}
public String getMovie() {
return movie;
}
public void setMovie(String movie) {
this.movie = movie;
}
public int getRate() {
return rate;
}
public void setRate(int rate) {
this.rate = rate;
}
public long getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(long timeStamp) {
this.timeStamp = timeStamp;
}
public int getUid() {
return uid;
}
public void setUid(int uid) {
this.uid = uid;
}
@Override
public String toString() {
return “MovieBean [movie=” + movie + “, rate=” + rate + “, timeStamp=” + timeStamp + “, uid=” + uid + “]”;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(movie);
out.writeInt(rate);
out.writeLong(timeStamp);
out.writeInt(uid);
}
@Override
public void readFields(DataInput in) throws IOException {
this.movie = in.readUTF() ;
this.rate = in.readInt() ;
this.timeStamp = in.readLong() ;
this.uid = in.readInt() ;
}
@Override
public int compareTo(MovieBean o) {
//用户uid相同 按照分数降序排列
return Integer.compare(this.uid, o.getUid())==0?Integer.compare(o.getRate(), this.rate):Integer.compare(this.uid, o.getUid());
}
}
自定义分区
/**
- 自定义数据分区的逻辑按照用户的uid进行分区<MovieBean, NullWritable> 泛型 map端输出的数据
- @author DOIT_HANG_GE
- @version 2019年2月27日
/
public class MyPartitioner extends Partitioner<MovieBean, NullWritable> {
@Override
public int getPartition(MovieBean key, NullWritable value, int numPartitions) {
return (key.getUid() + “”.hashCode() & Integer.MAX_VALUE) % numPartitions;
}
}
自定义reduce端的分组迭代GroupingComparator
/* - reduce端按照指定的key为组迭代
- @author DOIT_HANG_GE
- @version 2019年2月27日
*/
public class MyGroupingComparator extends WritableComparator{
public MyGroupingComparator(){
// true 反序列化成WritableComparable对象
super(MovieBean.class,true) ;
}
// reduce 会从map输出的文件中获取两个相邻的key 并反序列化成WritableComparable
// 判断两个相邻的key是否是同一组数据 然后迭代
@Override
public int compare(WritableComparable a, WritableComparable b) {
MovieBean k1 = (MovieBean) a ;
MovieBean k2 = (MovieBean) b ;
return Integer.compare(k1.getUid(), k2.getUid());
}
}
MAPREDUCE
public class TopDemo {
public static class TopMapper extends Mapper<LongWritable, Text, MovieBean, NullWritable> {
Gson gs = new Gson();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, MovieBean, NullWritable>.Context context)
throws IOException, InterruptedException {
String str = value.toString();
try {
MovieBean movieBean = gs.fromJson(str, MovieBean.class);
// map的数据输出
context.write(movieBean, NullWritable.get());
} catch (JsonSyntaxException e) {
e.printStackTrace();
}
}
}
public static class TopReducer extends Reducer<MovieBean, NullWritable, MovieBean, NullWritable> {
@Override
protected void reduce(MovieBean mb, Iterable<NullWritable> iters,
Reducer<MovieBean, NullWritable, MovieBean, NullWritable>.Context context)
throws IOException, InterruptedException {
int count = 0;
for (NullWritable nullWritable : iters) {
count++;
context.write(mb, nullWritable);
if (count == 20) {
return;
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(TopMapper.class);
job.setReducerClass(TopReducer.class);
job.setMapOutputKeyClass(MovieBean.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(MovieBean.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(MyPartitioner.class);
job.setGroupingComparatorClass(MyGroupingComparator.class);
FileInputFormat.setInputPaths(job, new Path("d:/data/movie/input"));
FileOutputFormat.setOutputPath(job, new Path("d:/data/movie/output3"));
boolean res = job.waitForCompletion(true);
System.exit(res?0:-1);
}
}