package com.zhiyou.bd23.topn;
import java.io.File;
import java.io.IOException;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import sun.launcher.resources.launcher;
//分组topN,求出每一种类型的音乐的播放量top3的音乐名称和播放次数
public class GroupTopN {
//map的输出:key(音乐类型),value(音乐名称+播放次数)
public static class GroupTopNMap extends Mapper<LongWritable, Text, Text, Text>{
private Text outputKey = new Text();
private Text outputValue = new Text();
private String[] infos;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
if (key.get()>0) {
infos = value.toString().trim().split(";");
if(infos!=null && infos.length==3){
outputKey.set(infos[2]);
outputValue.set(infos[0]+";"+infos[1]);
context.write(outputKey, outputValue);
}
}
}
}
//reduce上对每一组key求一个topN
public static class GroupTopNReduce extends Reducer<Text, Text, Text, Text>{
private Text outputKey = new Text();
private Text outputValue = new Text();
//定义treemap用来求每一组类型的音乐的top3
private TreeMap<Integer, String> top3 = new TreeMap<Integer, String>();
private String[] infos;
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
//清空top3
top3.clear();
for(Text value:values){
infos = value.toString().split(";");
//top3中有播放次数相同的
if(top3.containsKey(Integer.valueOf(infos[1]))){
//把歌曲的名称添加到播放次数相同的歌曲名称中去
top3.put(Integer.valueOf(infos[1]), top3.get(Integer.valueOf(infos[1]))+","+infos[0]);
}else{
if(top3.size()==3){
//加进来 再删一条
top3.put(Integer.valueOf(infos[1]), infos[0]);
top3.remove(top3.firstKey());
}else{
//直接加进来
top3.put(Integer.valueOf(infos[1]), infos[0]);
}
}
}
//把top3中的数据输出
for(int i:top3.descendingKeySet()){
outputKey.set(key);
outputValue.set(top3.get(i)+";"+i);
context.write(outputKey, outputValue);
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(GroupTopN.class);
job.setJobName("分组求topn");
job.setMapperClass(GroupTopNMap.class);
job.setReducerClass(GroupTopNReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path input = new Path("/musictype.txt");
Path outputDir = new Path("/grouptopnoutput");
outputDir.getFileSystem(conf).delete(outputDir, true);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, outputDir);
job.setNumReduceTasks(2);
System.exit(job.waitForCompletion(true)?0:1);
}
}
分组TopN
最新推荐文章于 2023-07-04 20:07:29 发布