MapReduce编程案例——气象分析
描述:
·通过采集的气象数据分析每年的最高温度
·初始数据:
0067011990999991950051507004888888889999999N9+00001+9999999999999999999999
0067011990999991950051512004888888889999999N9+00221+9999999999999999999999
0067011990999991950051518004888888889999999N9-00111+9999999999999999999999
0067011990999991949032412004888888889999999N9+01111+9999999999999999999999
0067011990999991950032418004888888880500001N9+00001+9999999999999999999999
0067011990999991950051507004888888880500001N9+00781+9999999999999999999999
数据说明:
第15-19个字符是year
第45-50位是温度表示,+表示零上 -表示零下,且温度的值不能是9999,9999表示异常数据
第50位值只能是0、1、4、5、9几个数字
·结果:
1949 111
1950 78
·分析:
Map函数只是一个数据准备阶段,通过这种方式来准备数据,使reduce函数能够继续对它进行处理,即找出最大温度。
MAP部分:数据准备阶段取year 和 温度 作为k2和v2
REDUCE部分:取最大温度
代码说明:
package org.apache.hadoop.mapreduce; import java.io.IOException; import java.net.URI; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WeatherMain { final static String INPUT_PATH="hdfs://localhost:9000/input"; final static String OUTPUT_PATH="hdfs://localhost:9000/output"; public static void main(String[] args) throws Exception { // TODO Auto-generated method stub Configuration configuration = new Configuration(); FileSystem fileSystem=FileSystem.get(new URI(INPUT_PATH),configuration); if (fileSystem.exists(new Path(OUTPUT_PATH))) { fileSystem.delete(new Path(OUTPUT_PATH),true); } Job job = new Job(configuration,"WeatherMax"); FileInputFormat.setInputPaths(job, INPUT_PATH); FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH)); job.setJarByClass(WeatherMain.class); job.setMapperClass(weathermapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(weatherreducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); } public static class weatherreducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public weatherreducer() { } protected void reduce(Text key2, Iterable<IntWritable> value2, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { Text key3=key2; int maxValue=0; for(IntWritable intWritable :value2) { maxValue = Math.max(maxValue, intWritable.get()); } IntWritable value4 =new IntWritable(maxValue); context.write(key3, value4); } } public static class weathermapper extends Mapper<LongWritable, Text, Text, IntWritable> { private static final int FAIL_DATA=9999; public void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String line =value.toString(); String year =line.substring(15,19); String tmp =line.substring(45,46); int val=0; if(tmp=="+") { val=Integer.parseInt(line.substring(46,50)); } else { val=Integer.parseInt(line.substring(45,50)); } if(Integer.parseInt(line.substring(46,50))!=FAIL_DATA &&line.substring(50,51).matches("[01459]")) { context.write(new Text(year), new IntWritable(val)); } } } }