mapreduce topK实现

package com.sl.hadoop;

import java.io.IOException;
import java.util.Iterator;
import java.util.NavigableMap;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * @ClassName: TopK
 * @Description:原始数据是每天的日期和温度,用空格分割,求气温最高的前K天,结果显示依然是日期和温度
 * @author: qiLZ
 * @date: 2018年9月11日 下午3:20:07
 */
public class TopK {
	/**
	 * topK问题的思路:使用TreeMap集合,利用treeMap本身的排序去选择topK,将需要排序的值当做key。
	 * 然后在map端和reduce端分别使用treeMap选出topK。在map端使用treeMap是减少reduce端的运算,
	 * reduce接收到的是每个map的topK。
	 */
	public static class topMap extends Mapper<Object, Text, Text, IntWritable> {

		TreeMap<Integer, String> top5 = new TreeMap<Integer, String>();

		public static final int K = 5;

		@Override
		protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			System.out.println(line);
			String[] split = line.split(" ");
			// 在这里是将温度当做key,日期当做value
			top5.put(Integer.valueOf(split[1]), split[0]);
			if (top5.size() > K) {
				top5.remove(top5.firstKey());
			}
		}

		@Override
		protected void cleanup(Mapper<Object, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			Iterator<Entry<Integer, String>> iterator = top5.entrySet().iterator();
			while (iterator.hasNext()) {
				Entry<Integer, String> next = iterator.next();
				Text key = new Text(next.getValue());
				IntWritable value = new IntWritable(next.getKey());
				context.write(key, value);
			}
		}

	}

	public static class topReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

		TreeMap<Integer, String> tops = new TreeMap<Integer, String>();

		public static final int K = 5;

		@Override
		protected void reduce(Text arg0, Iterable<IntWritable> arg1,
				Reducer<Text, IntWritable, Text, IntWritable>.Context arg2) throws IOException, InterruptedException {
			for (IntWritable value : arg1) {
				tops.put(value.get(), arg0.toString());
				if (tops.size() > K) {
					tops.remove(tops.firstKey());
				}
			}
		}

		@Override
		protected void cleanup(Reducer<Text, IntWritable, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			// 倒序排列
			NavigableMap<Integer, String> descendingMap = tops.descendingMap();
			Iterator<Entry<Integer, String>> iterator = descendingMap.entrySet().iterator();
			while (iterator.hasNext()) {
				Entry<Integer, String> next = iterator.next();
				Text key = new Text(next.getValue());
				IntWritable value = new IntWritable(next.getKey());
				context.write(key, value);
			}
		}
	}

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Path outPath = new Path("/outTop");
		FileSystem fs = FileSystem.get(conf);
		if (fs.exists(outPath)) {
			fs.delete(outPath, true);
		}
		Job job = Job.getInstance(conf);
		job.setJobName("toptest");
		job.setJarByClass(TopK.class);
		job.setMapperClass(topMap.class);
		job.setReducerClass(topReduce.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputKeyClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path("/testtop.txt"));
		FileOutputFormat.setOutputPath(job, outPath);
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}

}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值