题目:
求评分(平均值)最高的前10的电影id?(输出:moveid 平均分)
思路:
Map中获取数据,键:电影好,值:电影信息类(实现Comparable)
Reduce中汇总数据,将某部电影的评分算出来,然后放到TreeSet中
Reducer的cleanup中,输出TreeSet中记录的电影信息
代码:
//电影信息类
package Sort02;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class RecordWritable implements WritableComparable<RecordWritable> {
private String movie = "";
private long sum = 0L;
private double rate = 0.0;
public int compareTo(RecordWritable o) {
if(this.rate < o.rate){
return 1;
}
else if(this.rate > o.rate){
return -1;
}
else {
return Integer.parseInt(this.movie) - Integer.parseInt(o.movie);
}
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(movie);
dataOutput.writeLong(sum);
dataOutput.writeDouble(rate);
}
public void readFields(DataInput dataInput) throws IOException {
this.movie = dataInput.readUTF();
this.sum = dataInput.readLong();
this.rate = dataInput.readDouble();
}
@Override
public String toString() {
return "movie='" + movie + '\'' + ", rate=" + rate;
}
public String getMovie() {
return movie;
}
public void setMovie(String movie) {
this.movie = movie;
}
public long getSum() {
return sum;
}
public void setSum(long sum) {
this.sum = sum;
}
public double getRate() {
return rate;
}
public void setRate(double rate) {
this.rate = rate;
}
}
//MapReduce
package Sort02;
import org.apache.commons.lang.ObjectUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.TreeSet;
public class MainDemo {
public static void main(String[] args){
try{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Sort02-MainDemo");
job.setJarByClass(MainDemo.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(RecordWritable.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(RecordWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("C:/Users/Administrator/Desktop/Data/input/rating.json"));
FileOutputFormat.setOutputPath(job, new Path("C:/Users/Administrator/Desktop/Data/output/03/00"));
int success = job.waitForCompletion(true) ? 0: 1;
System.exit(success);
}
catch (Exception e){
e.printStackTrace();
}
}
//自定义的Mapper类
public static class MyMapper extends Mapper<Object, Text, Text, RecordWritable>{
Text k = new Text();
RecordWritable v = new RecordWritable();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String row = value.toString();
String[] words = row.split("\"");
v.setMovie(words[3]);
v.setSum(Integer.parseInt(words[7]));
v.setRate(0.0);
k.set(words[3]);
context.write(k, v);
}
}
//自定义的Reducer类
public static class MyReducer extends Reducer<Text, RecordWritable, RecordWritable, NullWritable>{
TreeSet<RecordWritable> ts = new TreeSet<RecordWritable>();
final static int N = 20;
@Override
protected void reduce(Text key, Iterable<RecordWritable> values, Context context) throws IOException, InterruptedException {
int cnt = 0;
long sum = 0L;
for (RecordWritable rw: values){
cnt += 1;
sum += rw.getSum();
}
RecordWritable rw = new RecordWritable();
rw.setMovie(key.toString());
rw.setRate((sum*1.0)/(cnt*1.0));
ts.add(rw);
if(ts.size() > N){
ts.pollLast();
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for(RecordWritable rw: ts){
context.write(rw, NullWritable.get());
}
}
}
}