package com.smartmap.example;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class AverageByAttribute
{
public static class MapClass extends Mapper
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String fields[] = value.toString().split(",", -20);
String country = fields[4];
String numClaims = fields[8];
if (numClaims.length() > 0 && !numClaims.startsWith("\"")) {
context.write(new Text(country),new Text(numClaims + ",1"));
}
}
public static class ReducerClass extends Reducer
{
public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException
{
double sum = 0;
int count = 0;
for(Text val : values)
{
String fields[] = val.toString().split(",");
sum += Double.parseDouble(fields[0]);
count += Integer.parseInt(fields[1]);
}
context.write(key, new DoubleWritable(sum/count));
}
}
public static class CombineClass extends Reducer
{
public void reduce(Text key, Iterable values, Context context) throws IOException , InterruptedException
{
double sum = 0;
int count = 0;
for(Text val : values)
{
String fields[] = val.toString().split(",");
sum += Double.parseDouble(fields[0]);
count += Integer.parseInt(fields[1]);
}
context.write(key, new Text(sum + "," + count));
}
}
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length != 2)
{
System.err.println("Usage: AverageByAttribute ");
System.exit(2);
}
//conf.set("key.value.separator.in.input.line", ",");
Job job = new Job(conf, "AverageByAttribute");
job.setJarByClass(AverageByAttribute.class);
job.setMapperClass(MapClass.class);
job.setCombinerClass(CombineClass.class);
job.setReducerClass(ReducerClass.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
}
----------------------------------------------------------------------------------------------------------
hdfs://192.168.1.30:9000/data/apat /opt/eclipse/workspace/data/out