hadoop1-TopK问题实现之优化设计

此次map设计引入了一个自定义的list容器,使map输出时仅输出前几名即可。

直接程序代码(引言可参考上一篇):

  

package test;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class TopK2 {
	//改进型map
	public static class Map extends Mapper<Object, Text, MyKey, NullWritable>{
		private static MyList list = null;
		//初始化list,使用配置容量
		protected void setup(Context context) 
				throws IOException ,InterruptedException {
			list = new MyList(Integer.parseInt(context.getConfiguration().get("top_num")));
		};
		
		protected void map(Object key, Text value, Context context) 
				throws java.io.IOException ,InterruptedException {
			try {
				list.add(Integer.parseInt(value.toString()));
			} catch (Exception e) {
				// TODO: handle exception
				return ;
			}
		};
		//Map任务结束时执行
		protected void cleanup(Context context) 
				throws IOException ,InterruptedException {
			for (Integer item : list) {
				context.write(new MyKey(item), NullWritable.get());
			}
			list.clear();
		};
	}
	public static class Reduce extends Reducer<MyKey, NullWritable, Text, NullWritable>{
		private static Text k = new Text();
		
		private static MyList list = null;
		//初始化list,使用配置容量
		protected void setup(Context context) 
				throws IOException ,InterruptedException {
			list = new MyList(Integer.parseInt(context.getConfiguration().get("top_num")));
		};
		
		protected void reduce(MyKey key, Iterable<NullWritable> values, Context context) 
				throws IOException ,InterruptedException {
			//所得到的key是降序输出的,因为是自定义的key
			try {
				list.add(key.getNum());
			} catch (Exception e) {
				// TODO: handle exception
				return ;
			}
		};
		protected void cleanup(Context context) 
				throws IOException ,InterruptedException {
			for (int i=0; i<list.size(); i++) {
				k.set(list.get(i)+"\t"+(i+1));
				context.write(k, NullWritable.get());
			}
			list.clear();
		};
	}
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		
		String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
		if(otherArgs.length != 3){
			System.err.println("Usage:TopK");
			System.exit(2);
		}
		//参数3 为要获取的最大个数
		conf.set("top_num", args[2]);
		Job job = new Job(conf, "TopK2");
		job.setJarByClass(TopK2.class);
		
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);
		
		job.setMapOutputKeyClass(MyKey.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	private static class MyKey implements WritableComparable<MyKey>{

		private int num;
		
		public int getNum() {
			return num;
		}

		public MyKey() {
		}

		public MyKey(int num) {
			super();
			this.num = num;
		}

		@Override
		public void readFields(DataInput in) throws IOException {
			// TODO Auto-generated method stub
			num = in.readInt();
		}

		@Override
		public void write(DataOutput out) throws IOException {
			// TODO Auto-generated method stub
			out.writeInt(num);
		}

		@Override
		public int compareTo(MyKey o) {
			// TODO Auto-generated method stub
			//反序输出
			return o.num - this.num;
		}
	}
	private static class MyList extends ArrayList<Integer>{
		//默认容量为5
		private int cont = 5;
		
		public MyList(int num){
			super();
			this.cont = num;
		}
		
		public void add(int value){
			//添加前判断,如果<cont 直接添加,不用判断
			if(super.size() < cont){
				super.add(value);
			}else{
				//此处还可以进行优化,可以采用动态链表的形式
				Collections.sort(this);
				if(value > this.get(0)){
					this.set(0, value);
				}
			}
		}
	}
}

 计算结果:

不写了,一样。

转载于:https://www.cnblogs.com/jsunday/p/3807328.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值