程序基于Hadoop2.7.4开发, 可运行
天气数据请到ncdc或者hadoop权威指南书籍网站获取.
public class MaxTemperature {
public static class MaxTemperatureMapper extends Mapper<Object, Text, Text, IntWritable> {
//天气温度9999,代表数据丢失
private static final int MISSING = 9999;
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//利用toString()方法将Text对象的String内容提取出来
String line = value.toString();
//获取年份数据
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87)=='+'){
//Integer.parseInt()不接受正号字符
airTemperature = Integer.parseInt(line.substring(88, 92));
}else{
//接受负号字符
airTemperature = Integer.parseInt(line.substring(87, 92));
}
//获取天气可信度数据
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")){
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
public static class MaxTemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values){
if (value.get() > maxValue){
maxValue = value.get();
}
}
context.write(key, new IntWritable(maxValue));
}
}
public static void main(String[] args) throws Exception{
//默认的Hadoop集群配置
Configuration conf = new Configuration();
//创建一个Job, 基于以上的Hadoop集群
Job job = Job.getInstance(conf, "max temperature");
//设置程序的入口类, 代替了设置jar名; hadoop平台会扫描jar包, 找这个类.
job.setJarByClass(MaxTemperature.class);
//设置Mapper类
job.setMapperClass(MaxTemperatureMapper.class);
//设置reducer类
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//添加输入目录
FileInputFormat.addInputPath(job, new Path(args[0]));
//设置输出目录
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//提交任务
System.exit(job.waitForCompletion(true) ? 1:0);
}