movie.txt
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"2"}
{"movie":"1193","rate":"3","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"2","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"3"}
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"3","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"2","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"5"}
计算topN
显示记录时按照每个用户的评分从高到低显示
{"movie":"1193","rate":5.0,"timeStamp":978300760,"uid":"1"}
{"movie":"661","rate":4.0,"timeStamp":978302109,"uid":"1"}
{"movie":"661","rate":1.0,"timeStamp":978302109,"uid":"1"}
{"movie":"661","rate":1.0,"timeStamp":978302109,"uid":"1"}
{"movie":"661","rate":1.0,"timeStamp":978302109,"uid":"1"}
{"movie":"661","rate":3.0,"timeStamp":978302109,"uid":"2"}
{"movie":"1193","rate":3.0,"timeStamp":978300760,"uid":"2"}
{"movie":"1193","rate":2.0,"timeStamp":978300760,"uid":"2"}
{"movie":"1193","rate":2.0,"timeStamp":978300760,"uid":"2"}
{"movie":"1193","rate":5.0,"timeStamp":978300760,"uid":"3"}
{"movie":"1193","rate":5.0,"timeStamp":978300760,"uid":"3"}
{"movie":"661","rate":4.0,"timeStamp":978302109,"uid":"3"}
{"movie":"1193","rate":3.0,"timeStamp":978300760,"uid":"3"}
{"movie":"661","rate":4.0,"timeStamp":978302109,"uid":"5"}
代码实现
package com.doit.demo08;
import com.doit.demo05.MovieBean;
import com.google.gson.Gson;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.*;
public class Movie {
private static class MovieMapper extends Mapper<LongWritable,Text,Text, MovieBean>{
Gson gs = new Gson();
Text k2 = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
MovieBean movieBean = gs.fromJson(value.toString(), MovieBean.class);
String uid = movieBean.getUid();
k2.set(uid);
context.write(k2,movieBean);
} catch (Exception e) {
e.printStackTrace();
}
}
}
private static class MovieReducer extends Reducer<Text,MovieBean, NullWritable,Text>{
Text v3 = new Text();
@Override
protected void reduce(Text key, Iterable<MovieBean> values,Context context) throws IOException, InterruptedException {
ArrayList<MovieBean> list = new ArrayList<>();
for (MovieBean value : values) {
MovieBean mb = new MovieBean();
mb.setUid(value.getUid());
mb.setMovie(value.getMovie());
mb.setRate(value.getRate());
mb.setTimeStamp(value.getTimeStamp());
list.add(mb);
}
Collections.sort(list, new Comparator<MovieBean>() {
@Override
public int compare(MovieBean o1, MovieBean o2) {
return Double.compare(o2.getRate(),o1.getRate());
}
});
for (MovieBean movieBean : list) {
Gson gs = new Gson();
v3.set(gs.toJson(movieBean));
context.write(NullWritable.get(),v3);
}
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "movie2");
job.setMapperClass(MovieMapper.class);
job.setReducerClass(MovieReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MovieBean.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\movie.txt"));
FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put7"));
job.waitForCompletion(true);
}
}