MapReduce之气象数据分析
问题描述
通过获取的气象数据来分析气温最大值
样例输入
数据集地址:天气数据集
样例输出
表示1901年最大温度为239,可能是他们使用的温度单位和咱们不一样吧
mapper阶段任务
map阶段主要是对文本进行切割,提取时间和温度形成 <时间,<温度,,温度,温度>>的键值对
mapper阶段编码如下
public static class MaxTemperatureMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
private static final int MISSING=9999;
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
String line=value.toString();
String year=line.substring(15,19);
int airTemperature;
if(line.charAt(87)=='+'){
airTemperature=Integer.parseInt(line.substring(88,92));
}else{
airTemperature=Integer.parseInt(line.substring(87,92));
}
String quality=line.substring(92,93);
if(airTemperature!=MISSING&&quality.matches("[01459]")){
context.write(new Text(year),new IntWritable(airTemperature));
}
}
}
reducer阶段任务
reduce阶段任务就是从<时间,<温度,温度>>中遍历温度并查找的最大值
reducer阶段编码如下
public static class MaxTemperatureReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
int maxValue=Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key,new IntWritable(maxValue));
}
}
完整代码如下
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.lang.InterruptedException;
import java.util.Iterator;
public class Ncdc {
public static class MaxTemperatureMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
private static final int MISSING=9999;
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
String line=value.toString();
String year=line.substring(15,19); //获取时间,可以修改成月和日
int airTemperature; //获取温度
if(line.charAt(87)=='+'){
airTemperature=Integer.parseInt(line.substring(88,92));
}else{
airTemperature=Integer.parseInt(line.substring(87,92));
}
String quality=line.substring(92,93);
if(airTemperature!=MISSING&&quality.matches("[01459]")){
context.write(new Text(year),new IntWritable(airTemperature));
}
}
}
public static class MaxTemperatureReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
int maxValue=Integer.MIN_VALUE; //遍历查找最大值
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key,new IntWritable(maxValue));
}
}
public static void main(String[] args) throws Exception{
Configuration conf=new Configuration();
String[] otherArgs=new String[]{"input/file.txt","output"};
if(otherArgs.length!=2){
System.err.println("参数错误");
System.exit(2);
}
Job job= new Job(conf,"Ncdsc");
FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
job.setJarByClass(Ncdc.class);
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
写在最后
这段程序既可以以年为单位查找最大气温,还可以以月,日为单位,有兴趣大家可以自行修改map阶段中的相关代码