package com.ali.godar;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class MergeData {
public static class SpliterMapper extends Mapper<Object, Text, Text, Text> {
private String[] keyIndex;
private String[] valueIndex;
private String seperator;
private String[] metaDatas;
private Text keyStrWord = new Text();
private Text valueStrWord = new Text();
private String getKey() {
StringBuilder sb = new StringBuilder();
for (String index : keyIndex) {
sb.append(metaDatas[Integer.parseInt(index)].trim());
sb.append(seperator);
}
if (sb.length() > 0) {
sb.deleteCharAt(sb.length() - 1);
}
return sb.toString();
}
private String getValue() {
StringBuilder sb = new StringBuilder();
for (String index : valueIndex) {
sb.append(metaDatas[Integer.parseInt(index)].trim());
sb.append(seperator);
}
if (sb.length() > 0) {
sb.deleteCharAt(sb.length() - 1);
}
return sb.toString();
}
private void configMaper(Context context, Text value) {
Configuration conf = context.getConfiguration();
// config seperator
String sep = conf.get("maper.seperator", " ");
this.seperator = sep.trim();
String line = value.toString();
metaDatas = line.split(seperator);
String keyIndexStr = conf.get("maper.keys.index");
keyIndex = keyIndexStr.split(",");
String valueIndexStr = conf.get("maper.values.index");
valueIndex = valueIndexStr.split(",");
}
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
this.configMaper(context, value);
String keyStr = getKey();
String valueStr = getValue();
keyStrWord.set(keyStr);
valueStrWord.set(valueStr);
context.write(keyStrWord, valueStrWord);
}
}
public static class MergeDataReducer extends
Reducer<Text, Text, Text, Text> {
private String seperator;
private int valueFieldCounts;
private Text result = new Text();
private void configReducer(Context context) {
Configuration conf = context.getConfiguration();
// config seperator
String sep = conf.get("reducer.seperator", " ");
this.seperator = sep.trim();
String maperValueKeyStr = conf.get("maper.values.index");
valueFieldCounts = maperValueKeyStr.split(",").length;
}
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
this.configReducer(context);
double[] sum = new double[valueFieldCounts];
for (Text val : values) {
int i = 0;
String str = val.toString();
String[] data = str.split(seperator);
for (String metaData : data) {
sum[i++] += Double.parseDouble(metaData);
}
}
StringBuilder sb = new StringBuilder();
for (double data : sum) {
sb.append(data);
sb.append(seperator);
}
sb.deleteCharAt(sb.length() - 1);
result.set(sb.toString());
key.set(key.toString());
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
// System.out.println("otherArgs:");
// for (String string : otherArgs) {
// System.out.println(string);
// }
if (otherArgs.length < 6) {
System.err
.println("Usage: MergeData <in> <out> <maper.seperator> <maper.keys.index> <maper.values.index>");
System.exit(2);
}
// hadoop jar merge.jar com.ali.godar.MergeData input/ out11 , 0,2 1
// hadoop jar merge.jar com.ali.godar.MergeData input/ out11 , 0,2 1
// conf.set("maper.seperator", ",");
// conf.set("maper.keys.index", "0");
// conf.set("maper.values.index", "1");
// conf.set("reducer.seperator", ",");
conf.set("maper.seperator", otherArgs[3]);
conf.set("maper.keys.index", otherArgs[4]);
conf.set("maper.values.index", otherArgs[5]);
conf.set("reducer.seperator", otherArgs[3]);
Job job = new Job(conf, "merge data");
job.setJarByClass(MergeData.class);
job.setMapperClass(SpliterMapper.class);
job.setCombinerClass(MergeDataReducer.class);
job.setReducerClass(MergeDataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// DBConfiguration.configureDB(job.get, "com.mysql.jdbc.Driver",
// "jdbc:mysql://localhost:3306/school", "root", "root");
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Hadoop
最新推荐文章于 2023-07-26 00:52:14 发布