2019-9-4 [MapReduce] (400MB以下)小数据:查找TOP5

/**
 * MyMRRunProcess.java
 * com.hnxy.mr
 * Copyright (c) 2019, 子墨版权所有.
 * @author   ZIMO
 * @Date	 2019年9月4日 	 
 */
public class MyMRRunProcess extends Configured implements Tool{

	/**
	 * MAP CLASS
	 * @author   ZIMO
	 * @Date	 2019年9月4日
	 */
	private static class MyMapper extends Mapper<LongWritable, Text, Text,IntWritable > {
		
		//定义map的变量
		private Text outkey = new Text();
		private IntWritable outval = new IntWritable(1);
		private String[] strs = null;
		
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			//运行
			strs = value.toString().split("\t");
			if (null != strs && strs.length > 0) {
				for (String s : strs) {
					outkey.set(s);
					context.write(outkey, outval);
				}
			}
		}
	}
	
	/**
	 * REDUCE CLASS
	 * @author   ZIMO
	 * @Date	 2019年9月4日
	 */
	private static class MyReduce extends Reducer<Text,IntWritable, Text, LongWritable> {
		
		//定义rd的变量
		private LongWritable outval = new LongWritable();
		private Text outkey = new Text();
		private Long tmp = 0L;
		private Map<String,Long> map = new HashMap<String,Long>();
		
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
			tmp = 0L;
			for (IntWritable i: values) {
				tmp +=i.get();
			}
			outval.set(tmp);
			//context.write(key, outval);
			//reduce能够保证所有的key count-->map
			map.put(key.toString(), outval.get());
		} 
		
		//clean up
		//在reduce执行完成之后再执行一次
		
		@Override
		protected void cleanup(Reducer<Text, IntWritable, Text, LongWritable>.Context context)
				throws IOException, InterruptedException {
			//对map排序
			//System.out.println(map.toString());
			//问题1:如果比较,要实现sort方法map-->list
			List<Map.Entry<String, Long>> list = new LinkedList<Map.Entry<String, Long>>(map.entrySet());
			//问题2:list如何排序,值倒序排序
			Collections.sort(list,new Comparator<Map.Entry<String, Long>>() {
				//倒序排序
				@Override
				public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
					return o2.getValue().compareTo(o1.getValue());
				}
			});
			//如何输出前五名
			for (int i = 0; i < 5; i++) {
				outkey.set(list.get(i).getKey());
				outval.set(list.get(i).getValue());
				context.write(outkey,outval);
			}
		}
	}
	
	/**
	 * JOB RUNNER METHOD
	 * (non-Javadoc)
	 * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
	 */
	public int run(String[] args) throws Exception {
		//创建方法的返回值
		int count = -1;
		// 创建配置文件加载对象
		Configuration conf = this.getConf();
		//创建本次job
		Job job = Job.getInstance(conf, "MRTP");
		//设置job
		//第一阶段:处理输入与输出
		job.setJarByClass(MyMRRunProcess.class);
		// 设置输入与输出路径
		Path in = new Path(args[0]);
		Path out = new Path(args[1]);
		// 判断输出路径是否存在 如果存在就删除
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(out)){
			fs.delete(out,true);
			System.out.println("The old path is deleted!");
		}
		// 输入与输出的格式化
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		// 设置输入与输出的目录
		FileInputFormat.addInputPath(job, in);
		FileOutputFormat.setOutputPath(job, out);
		// 第二阶段 : 设置 MR参数
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReduce.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		// 第三阶段 : 执行阶段
		count = job.waitForCompletion(true)?0:-1;
		// 返回
		return count;
	}
	
	/**
	 * PROGRAM MAIN METHOD 
	 * @param args VIP
	 */
	public static void main(String[] args) {		
		try {
			//执行
			int result = ToolRunner.run(new MyMRRunProcess(), args);
			//判断
			String msg = result == 0?"JOB OK!":"JOB FAIL!";
			System.out.println(msg);
			System.exit(result);
		} catch (Exception e) {
			e.printStackTrace();
		}		
	}
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值