MapReduce案例10——多数据文件依赖计算

题目:

描述:求所有数对应位置的叠加和
   
   比如
   
0001.txt 文件有数据:
   1
   2
   3
   4
   5
   .....

0002.txt 文件有数据:
   10
   10
   10
   10
   10
   
返回结果是:
   1	1
   2	3
   3	6
   4	10
   5	15
   .....

   10 25
   10 35
   10 45
   10 55
   10 65
   
   也就是每一行数字的后面都追加一个累加到该数字的综合
   
   总体解题思路:
   1、先求出每个文件的总和
   2、然后按照文件的顺序进行叠加
   
   难点:怎么让一个文件的多个数据块可以顺序叠加?

思路:采用mapjoin思想,分多步完成,第一步将每个文件的文件名、当前文件数字求和、以及当前文件之前所有文件和。

形式如下所示,然后将其作为配置文件,加载到内存中去,通过匹配文件名,获取当前文件数字的起始累加和,然后进行求和即可。

0001.txt	55	0
0002.txt	550	55
0003.txt	10	605
0005.txt	200	615
0006.txt	50	815
实现代码如下:
/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.reduceWork;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.ObjectUtils.Null;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.shell.Count;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import lpj.reduceWork.BigNumFileSortMR2.BigNumFileSortMR2_Mapper;
import lpj.reduceWork.BigNumFileSortMR2.BigNumFileSortMR2_Reducer;
/**
 *
 */
public class SumFielsNumMR {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		//-----------------------------
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		Job job = Job.getInstance(conf);
		job.setJarByClass(SumFielsNumMR.class);
		job.setMapperClass(SumFielsNumMR_Mapper.class);
		job.setReducerClass(SumFielsNumMR_Reducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		Path inputPath = new Path("/a/homework10/input");
		Path outputPath = new Path("/a/homework10/output");
		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		//---------------------------------------------
		FileSystem fs2 = FileSystem.get(conf);//默认使用本地
		Job job2 = Job.getInstance(conf);
		job2.setJarByClass(SumFielsNumMR.class);
		job2.setMapperClass(SumFielsNumMR2_Mapper.class);
		job2.setNumReduceTasks(0);
		job2.setMapOutputKeyClass(Text.class);
		job2.setMapOutputValueClass(NullWritable.class);
		URI uri = new URI("/a/homework10/output/part-r-00000");
		job2.addCacheFile(uri);
		job2.setOutputKeyClass(Text.class);
		job2.setOutputValueClass(Text.class);
		Path inputPath2 = new Path("/a/homework10/input");//读入多个文件
		Path outputPath2 = new Path("/a/homework10/output2");//输出多个文件
		if (fs2.exists(outputPath2)) {
			fs2.delete(outputPath2, true);
		}
		String filename = "ok";
		FileInputFormat.setInputPaths(job2, inputPath2);
		FileOutputFormat.setOutputPath(job2, outputPath2);
		//--------------------------------------
		ControlledJob aJob = new ControlledJob(job.getConfiguration());
		ControlledJob bJob = new ControlledJob(job2.getConfiguration());
		aJob.setJob(job);
		bJob.setJob(job2);
		JobControl jc = new JobControl("jc");
		jc.addJob(aJob);
		jc.addJob(bJob);
		bJob.addDependingJob(aJob);
		Thread thread = new Thread(jc);
		thread.start();
		while(!jc.allFinished()){
			thread.sleep(1000);
		}
		jc.stop();
	}
	//求文件信息---------------------------------------
	public static class SumFielsNumMR_Mapper extends Mapper<LongWritable, Text, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			InputSplit inputSplit = context.getInputSplit();
			FileSplit fileSplit = (FileSplit) inputSplit;
			String kk = fileSplit.getPath().getName();
			kout.set(kk);
			context.write(kout, value);
		}
	}
	public static class SumFielsNumMR_Reducer extends Reducer<Text, Text, Text, Text>{
		Text kout = new Text();
		Text valueout = new Text();
		long sumfile = 0;
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
			long sum = 0;
			for(Text text : values){
				sum += Integer.parseInt(text.toString());
			}
			context.write(key, new Text(sum + "\t" + sumfile));
			sumfile += sum;
		}
	}
	//---------------------------将文件信息加载到内存-----------------------------------
	public static class SumFielsNumMR2_Mapper extends Mapper<LongWritable, Text, Text, NullWritable>{
		Text kout = new Text();
		Text valueout = new Text();
		Map<String, Long> fileSumMap = new HashMap<>();
		String fname = null;
		long fsum = 0;
		@SuppressWarnings("deprecation")
		@Override
		protected void setup(Context context)throws IOException, InterruptedException {
			Path[] paths = context.getLocalCacheFiles();
			String str = paths[0].toUri().toString();
			BufferedReader bf = new BufferedReader(new FileReader(new File(str)));
			String readline = null;
			while((readline = bf.readLine()) != null){
				String[] split = readline.split("\t");
				String filename = split[0];
				long sum = Long.parseLong(split[2]);
				fileSumMap.put(filename, sum);
			}
			IOUtils.closeStream(bf);
//			fileSumMap.put("0001.txt", 0L);
//			fileSumMap.put("0002.txt", 55L);
//			fileSumMap.put("0003.txt", 605L);
//			fileSumMap.put("0005.txt", 615L);
//			fileSumMap.put("0006.txt", 815L);
//			fileSumMap.put("7.txt", 865L);
		}
		
		int count = 0;
		long firsetnum = 0;
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			count ++;
			if (count == 1) {
				long num = Long.parseLong(value.toString());
				String kk = num + "\t" + (fsum + num);
				kout.set(kk);
				context.write(kout, NullWritable.get());
				firsetnum = fsum + num;
			}else {
				long num = Long.parseLong(value.toString());
				String kk = num + "\t" + (num + firsetnum);
				kout.set(kk);
				context.write(kout, NullWritable.get());
				firsetnum = num + firsetnum;
			}
			
		}
	}
	public static class SumFielsNumMR2_Reducer extends Reducer<Text, Text, Text, Text>{

		protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
		}
	}
}

运行结果:如果想要将结果输出到一个文件里面,需要添加reduce程序。

10	65
20	85
30	115
40	155
50	205
60	265
70	335
80	415
90	505
100	605


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值