2019-9-4 [MapReduce] combiner实现map到reduce之间先聚合一遍

//combiner上接map的输出,下接reduce的输入
	private static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable>{
		
		//定义reduce阶段需要的变量
		private IntWritable outval = new IntWritable();
		private Integer tmp = 0;
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {			
			//清空上一次key的累加记录
			tmp = 0;
			for (IntWritable i : values) {
				tmp += i.get();
			}
			//保存此次的数据
			outval.set(tmp);
			context.write(key, outval);		
		}
	}

完整代码:

/**
 * MyWordCount.java
 * com.hnxy.mr
 * Copyright (c) 2019, 子墨版权所有.
 * 自定义wordcount类
 * @author   Zimo
 * @Date	 2019年9月4日 	 
 */
public class MyWordCount4 extends Configured implements Tool{
	
	//定义自己的map类M类.class
	private static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
	
		//定义需要用到的变量
		private Text outkey = new Text();
		private IntWritable outval = new IntWritable(1);
		private String tmp = null;
		private String[] strs = null;
		
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			//字符转吗
			tmp = new String(value.getBytes(),0,value.getLength(),"GBK");
			strs = tmp.split("\t");
			//记录counter
			context.getCounter("数据行信息","总行数").increment(1);
			//业务判断
			//System.out.println(Arrays.toString(strs));
			if (null != strs && strs.length > 0 && Arrays.toString(strs).length() > 2) {
				//正确数据
				context.getCounter("数据行信息", "有效行数").increment(1);
				for (String s : strs) {
					outkey.set(s);
					//coutext.write -->buffer -->小文件 -->大文件
					context.write(outkey, outval);
				}
			}else {
				context.getCounter("数据行信息", "无效行数").increment(1);
			}
		}	
	}
	
	//combiner上接map的输出,下接reduce的输入
	private static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable>{
		
		//定义reduce阶段需要的变量
		private IntWritable outval = new IntWritable();
		private Integer tmp = 0;
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {			
			//清空上一次key的累加记录
			tmp = 0;
			for (IntWritable i : values) {
				tmp += i.get();
			}
			//保存此次的数据
			outval.set(tmp);
			context.write(key, outval);		
		}
	}
	
	//定义自己的R类.class
	private static class MyReducer extends Reducer<Text, IntWritable, Text, LongWritable> {
		
		//定义reduce
		private LongWritable outval = new LongWritable();
		private Long tmp = 0L;
		
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
//			//清空上一次key的累加记录
//			tmp = 0L;
//			for (IntWritable i : values) {
//				tmp += i.get();
//			}
//			//保存此次的数据
//			outval.set(tmp);
//			context.write(key, outval);		
			System.out.println("key:"+key.toString()+"val"+IteratorUtils.toList(values.iterator()).toString());
		}
	}
	
	//定义自己的job run 成功回0,失败回-1
	public int run(String[] args) throws Exception {
		//创建方法的返回值
		int count = -1;
		//创建hadoop的配置文件加载对象
		Configuration conf = this.getConf();
		//通过conf创建自己
		Job job = Job.getInstance(conf, "wc");
		//封装工作细节
		//定义本次job的输入和输出
		Path in = new Path(args[0]);
		Path out = new Path(args[1]);
		//判断输出目录是否存在,如果存在就删除
		FileSystem fs = FileSystem.get(conf);
		if (fs.exists(out)) {
			//hadoop fs -rm-r
			fs.delete(out,true);
			System.out.println("The Old Path is deleted!");
		}
		//job的jat包类
		job.setJarByClass(MyWordCount4.class);
		//job 的输入和输出用什么格式化?
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		//job制定combiner
		job.setCombinerClass(MyCombiner.class);		
		//设置MR类
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		//设置MR类的输出类型
		//如果map输出和rd输出一样的类型,那么map输出就不用设置
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		//设置job的输入与输出路径
		FileInputFormat.addInputPath(job, in);
		FileOutputFormat.setOutputPath(job, out);
		//执行job
		count = job.waitForCompletion(true)?0:-1;
		//返回
		return count;
	}
	
	//定义主函数
	public static void main(String[] args) {
		try {
			int result = ToolRunner.run(new MyWordCount4(), args);
			String msg = result==0?"JOB OK!":"JOB FAIL!";
			System.out.println(msg);
			//System.exit(result);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值