数据
{“movie”:”1193”,”rate”:”5”,”timeStamp”:”978300760”,”uid”:”1”}
{“movie”:”661”,”rate”:”3”,”timeStamp”:”978302109”,”uid”:”1”}
{“movie”:”914”,”rate”:”3”,”timeStamp”:”978301968”,”uid”:”1”}
{“movie”:”3408”,”rate”:”4”,”timeStamp”:”978300275”,”uid”:”1”}
{“movie”:”2355”,”rate”:”5”,”timeStamp”:”978824291”,”uid”:”1”}
{“movie”:”1197”,”rate”:”3”,”timeStamp”:”978302268”,”uid”:”1”}
{“movie”:”1287”,”rate”:”5”,”timeStamp”:”978302039”,”uid”:”1”}
需求
求出每部电影评分的平均值,将平均值从高到低排列,并求出平均值最高的前20条数据
需求分析
将文件中的数据的属性封装,进行序列化和反序列化
Map读出数据,Reduce中setup建立一个小顶堆,reduce部分求平均值并放入小顶堆
将电影ID,平均值封装到一个新对象里边,输出数据
代码
MovieBean.java
public class MovieBean implements Writable{
//{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
private String movie;
private int rate;
private String timeStamp;
private String uid;
public void Set(String movie, int rate, String timeStamp, String uid) {
this.movie = movie;
this.rate = rate;
this.timeStamp = timeStamp;
this.uid = uid;
}
public String getMovie() {
return movie;
}
public void setMovie(String movie) {
this.movie = movie;
}
public int getRate() {
return rate;
}
public void setRate(int rate) {
this.rate = rate;
}
public String getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(String timeStamp) {
this.timeStamp = timeStamp;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
@Override
public String toString() {
return "MovieBean [movie=" + movie + ", rate=" + rate + ", timeStamp=" + timeStamp + ", uid=" + uid + "]";
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(movie);
out.writeInt(rate);
out.writeUTF(timeStamp);
out.writeUTF(uid);
}
@Override
public void readFields(DataInput in) throws IOException {
movie = in.readUTF();
rate = in.readInt();
timeStamp = in.readUTF();
uid = in.readUTF();
}
}
MovieAvgBean.java
public class MovieAvgBean implements Writable{
private float avg;
private String MovieId;
public float getAvg() {
return avg;
}
public void setAvg(float avg) {
this.avg = avg;
}
public String getMovieId() {
return MovieId;
}
public void setMovieId(String movieId) {
MovieId = movieId;
}
@Override
public String toString() {
return "MovieAvgBean [avg=" + avg + ", MovieId=" + MovieId + "]";
}
@Override
public void readFields(DataInput in) throws IOException {
avg = in.readFloat();
MovieId = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeFloat(avg);
out.writeUTF(MovieId);
}
public void Set(float avg, String movieId) {
this.avg = avg;
MovieId = movieId;
}
}
MapReduce部分
public class MovieAvgTopn {
public static class MapTask extends Mapper<LongWritable, Text, Text, MovieBean>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, MovieBean>.Context context)
throws IOException, InterruptedException {
ObjectMapper mapper = new ObjectMapper();
MovieBean bean = mapper.readValue(value.toString(), MovieBean.class);
String movie = bean.getMovie();
context.write(new Text(movie), bean);
}
}
public static class ReduceTask extends Reducer<Text, MovieBean, MovieAvgBean, NullWritable>{
TreeSet<MovieAvgBean> tree = null;
@Override
protected void setup(Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context)
throws IOException, InterruptedException {
tree = new TreeSet<>(new Comparator<MovieAvgBean>() {
@Override
public int compare(MovieAvgBean o1, MovieAvgBean o2) {
if(o1.getMovieId()==o2.getMovieId()) {
Float avg1 = o1.getAvg();
Float avg2 = o2.getAvg();
return avg1.compareTo(avg2);
}else {
return o1.getMovieId().compareTo(o2.getMovieId());
}
}
}) ;
}
@Override
protected void reduce(Text key, Iterable<MovieBean> values,
Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context) throws IOException, InterruptedException {
int count = 0;
float sum = 0;
float avg = 0;
for (MovieBean movieBean : values) {
sum += movieBean.getRate();
count++;
}
avg = sum/count;
MovieAvgBean avgBean = new MovieAvgBean();
avgBean.Set(avg,key.toString());
if(tree.size()<20) {
tree.add(avgBean);
}else {
MovieAvgBean first = tree.first();
if(first.getAvg()<avgBean.getAvg()) {
tree.remove(first);
tree.add(avgBean);
}
}
}
@Override
protected void cleanup(Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context)
throws IOException, InterruptedException {
for (MovieAvgBean movieAvgBean : tree) {
context.write(movieAvgBean, NullWritable.get());
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,"movieAvgTopn");
job.setMapperClass(MapTask.class);
job.setReducerClass(ReduceTask.class);
job.setJarByClass(MovieAvgTopn.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MovieBean.class);
job.setOutputKeyClass(MovieAvgBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("D:\\a\\rating.json"));
FileOutputFormat.setOutputPath(job, new Path("D:\\a\\MovieAvgTopn-out"));
//判断文件是否存在
File file = new File("D:\\a\\MovieAvgTopn-out");
if(file.exists()){
FileUtils.deleteDirectory(file);
}
boolean completion = job.waitForCompletion(true);
System.out.println(completion?"你很优秀!":"调bug");
}
}