MapReduce实现求各个省份的上下行流量

需求:实现文件拼接,通过文件中的相同的字段,做成key,将整行数据做成value,通过新建的JavaBean实现。

package com.liuliang;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;




public class MR {
	public static class MapTask extends Mapper<LongWritable, Text, HPBean, NullWritable>{
		Map<String,String> map = new HashMap<>();
		@Override
		protected void setup(Mapper<LongWritable, Text, HPBean, NullWritable>.Context context)
				throws IOException, InterruptedException {
			Configuration conf = new Configuration();
			FileSystem fs = FileSystem.get(conf);
			FSDataInputStream inputStream = fs.open(new Path("d:/data/phone.txt"));
			BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
			String line;
			while((line = br.readLine())!=null) {
				//130	1300000	山东	济南	联通	250000	0531	370100
				String[] split = line.split("\\s");
				//13826544101	http://www.weibo.com/?category=7 20 5000
				
				map.put(split[1], line);
			}
		}
		HPBean bean = new HPBean();
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			//130	1300000	山东	济南	联通	250000	0531	370100
	
			//1382654	http://www.weibo.com/?category=7 20 5000
			
			/*private long phone;
			private long upFlow;
			private long downFlow;
			private String privance;
			private String city;
			private String isp;*/
			
			String[] strings = value.toString().split("\\s");
			String seven = strings[0].substring(0, 7);
			String[] split = map.get(seven).split("\\s");
			
			
			bean.set(Long.parseLong(seven), Long.parseLong(strings[2]), Long.parseLong(strings[3]), split[2], split[3],split[4]);
			context.write(bean, NullWritable.get());
		}
	}
	
	/*public static class ReduceTask extends Reducer<HPBean, NullWritable, Text, LongWritable>{
		@Override
		protected void reduce(HPBean key, Iterable<NullWritable> values,
				Reducer<HPBean, NullWritable, Text, LongWritable>.Context context)
				throws IOException, InterruptedException {
			
		}
	}*/
	
public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(MR.class);
		job.setMapperClass(MapTask.class);
		//job.setReducerClass(ReduceTask.class);
		
		/*job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);*/
		job.setOutputKeyClass(HPBean.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.addInputPath(job, new Path("d:/data/http.log"));
		FileOutputFormat.setOutputPath(job, new Path("d:/out/http/"));
		
		File file = new File("d:/out/http/");
		if (file.exists()) {
			FileUtils.deleteDirectory(file);
		}
		
		boolean completion = job.waitForCompletion(true);
		System.out.println(completion?"执行成功":"执行失败");
	}

}

注意事项:
1.得分清楚是哪个文件拼接到另一个文件
2.在新创的javabean中,字段名要对应上
3.找不到相同字段的时候,要变换出来
4.将读取文件放在setup中,可以省下reduce做更多的事情

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值