分组TopN

package com.zhiyou.bd23.topn;

import java.io.File;
import java.io.IOException;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import sun.launcher.resources.launcher;

//分组topN,求出每一种类型的音乐的播放量top3的音乐名称和播放次数
public class GroupTopN {
	//map的输出:key(音乐类型),value(音乐名称+播放次数)
	public static class GroupTopNMap extends Mapper<LongWritable, Text, Text, Text>{
		private Text outputKey = new Text();
		private Text outputValue = new Text();
		private String[] infos;
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			if (key.get()>0) {
				infos = value.toString().trim().split(";");
				if(infos!=null && infos.length==3){
					outputKey.set(infos[2]);
					outputValue.set(infos[0]+";"+infos[1]);
					context.write(outputKey, outputValue);
				}
			}
		}
	}
	//reduce上对每一组key求一个topN
	public static class GroupTopNReduce extends Reducer<Text, Text, Text, Text>{
		private Text outputKey = new Text();
		private Text outputValue = new Text(); 
		//定义treemap用来求每一组类型的音乐的top3
		private TreeMap<Integer, String> top3 = new TreeMap<Integer, String>();
		private String[] infos;
		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//清空top3
			top3.clear();
			for(Text value:values){
				infos = value.toString().split(";");
				//top3中有播放次数相同的
				if(top3.containsKey(Integer.valueOf(infos[1]))){
					//把歌曲的名称添加到播放次数相同的歌曲名称中去
					top3.put(Integer.valueOf(infos[1]), top3.get(Integer.valueOf(infos[1]))+","+infos[0]);
				}else{
					if(top3.size()==3){
						//加进来 再删一条
						top3.put(Integer.valueOf(infos[1]), infos[0]);
						top3.remove(top3.firstKey());
					}else{
						//直接加进来
						top3.put(Integer.valueOf(infos[1]), infos[0]);
					}
				}
			}
			//把top3中的数据输出
			for(int i:top3.descendingKeySet()){
				outputKey.set(key);
				outputValue.set(top3.get(i)+";"+i);
				context.write(outputKey, outputValue);
			}
		}
	}
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(GroupTopN.class);
		job.setJobName("分组求topn");
		job.setMapperClass(GroupTopNMap.class);
		job.setReducerClass(GroupTopNReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		Path input = new Path("/musictype.txt");
		Path outputDir = new Path("/grouptopnoutput");
		outputDir.getFileSystem(conf).delete(outputDir, true);
		FileInputFormat.addInputPath(job, input);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setNumReduceTasks(2);
		System.exit(job.waitForCompletion(true)?0:1);
	}
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值