Hadoop

package com.ali.godar;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class MergeData {

	public static class SpliterMapper extends Mapper<Object, Text, Text, Text> {

		private String[] keyIndex;
		private String[] valueIndex;
		private String seperator;
		private String[] metaDatas;

		private Text keyStrWord = new Text();
		private Text valueStrWord = new Text();

		private String getKey() {
			StringBuilder sb = new StringBuilder();
			for (String index : keyIndex) {
				sb.append(metaDatas[Integer.parseInt(index)].trim());
				sb.append(seperator);
			}
			if (sb.length() > 0) {
				sb.deleteCharAt(sb.length() - 1);
			}
			return sb.toString();
		}

		private String getValue() {
			StringBuilder sb = new StringBuilder();
			for (String index : valueIndex) {
				sb.append(metaDatas[Integer.parseInt(index)].trim());
				sb.append(seperator);
			}
			if (sb.length() > 0) {
				sb.deleteCharAt(sb.length() - 1);
			}
			return sb.toString();
		}

		private void configMaper(Context context, Text value) {
			Configuration conf = context.getConfiguration();

			// config seperator
			String sep = conf.get("maper.seperator", " ");
			this.seperator = sep.trim();

			String line = value.toString();
			metaDatas = line.split(seperator);

			String keyIndexStr = conf.get("maper.keys.index");
			keyIndex = keyIndexStr.split(",");

			String valueIndexStr = conf.get("maper.values.index");
			valueIndex = valueIndexStr.split(",");
		}

		public void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			this.configMaper(context, value);
			String keyStr = getKey();
			String valueStr = getValue();
			keyStrWord.set(keyStr);
			valueStrWord.set(valueStr);
			context.write(keyStrWord, valueStrWord);
		}
	}

	public static class MergeDataReducer extends
			Reducer<Text, Text, Text, Text> {

		private String seperator;
		private int valueFieldCounts;

		private Text result = new Text();

		private void configReducer(Context context) {
			Configuration conf = context.getConfiguration();
			// config seperator
			String sep = conf.get("reducer.seperator", " ");
			this.seperator = sep.trim();

			String maperValueKeyStr = conf.get("maper.values.index");
			valueFieldCounts = maperValueKeyStr.split(",").length;

		}

		public void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			this.configReducer(context);
			double[] sum = new double[valueFieldCounts];

			for (Text val : values) {
				int i = 0;
				String str = val.toString();
				String[] data = str.split(seperator);
				for (String metaData : data) {
					sum[i++] += Double.parseDouble(metaData);
				}
			}

			StringBuilder sb = new StringBuilder();
			for (double data : sum) {
				sb.append(data);
				sb.append(seperator);
			}
			sb.deleteCharAt(sb.length() - 1);

			result.set(sb.toString());
			key.set(key.toString());
			context.write(key, result);
		}
	}

	public static void main(String[] args) throws Exception {


		
		Configuration conf = new Configuration();

		String[] otherArgs = new GenericOptionsParser(conf, args)
				.getRemainingArgs();

		// System.out.println("otherArgs:");
		// for (String string : otherArgs) {
		// System.out.println(string);
		// }

		if (otherArgs.length < 6) {
			System.err
					.println("Usage: MergeData <in> <out> <maper.seperator> <maper.keys.index> <maper.values.index>");
			System.exit(2);
		}

//		hadoop jar merge.jar com.ali.godar.MergeData input/ out11 , 0,2 1
//		hadoop jar merge.jar com.ali.godar.MergeData input/ out11 , 0,2 1
		
		// conf.set("maper.seperator", ",");
		// conf.set("maper.keys.index", "0");
		// conf.set("maper.values.index", "1");
		// conf.set("reducer.seperator", ",");

		conf.set("maper.seperator", otherArgs[3]);
		conf.set("maper.keys.index", otherArgs[4]);
		conf.set("maper.values.index", otherArgs[5]);
		conf.set("reducer.seperator", otherArgs[3]);

		Job job = new Job(conf, "merge data");
		job.setJarByClass(MergeData.class);
		job.setMapperClass(SpliterMapper.class);
		job.setCombinerClass(MergeDataReducer.class);
		job.setReducerClass(MergeDataReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

//		DBConfiguration.configureDB(job.get, "com.mysql.jdbc.Driver",
//				"jdbc:mysql://localhost:3306/school", "root", "root");

		FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}

}

阅读更多
个人分类: hadoop
想对作者说点什么? 我来说一句

Hadoop in Practice

2016年06月29日 7.47MB 下载

Hadoop 管理

2018年02月07日 455KB 下载

hadoop学习资料

2011年08月02日 643KB 下载

Cascading user guide

2012年11月25日 437KB 下载

hadoop配置文件

2018年05月14日 2KB 下载

hadoop配置相关的好书

2011年08月12日 151KB 下载

HADOOP实战第二版中文清晰版

2017年09月30日 40.7MB 下载

hadoop各种资料

2018年05月09日 150.89MB 下载

Hadoop实战

2018年02月23日 38.58MB 下载

hadoop命令大全

2018年01月08日 19KB 下载

没有更多推荐了,返回首页

加入CSDN,享受更精准的内容推荐,与500万程序员共同成长!
关闭
关闭