Hadoop 权威指南(第3版) 下载:https://download.csdn.net/download/henry_lin_wind/11036890
气象数据集的编码格式:安行并以ASCII格式存储,其中每一行是一条记录
1-4 0169
5-10 501360 # USAF weather station identifier
11-15 99999 # WBAN weather station identifier
16-23 20170101 # 记录日期
24-27 0000 # 记录时间
28 4
29-34 +52130 # 纬度(1000倍)
35-41 +122520 # 经度(1000倍)
42-46 FM-12
47-51 +0433 # 海拔(米)
52-56 99999
57-60 V020
61-63 220 # 风向
64 1 # 质量代码
65 N
66-69 0010
70 1
71-75 02600 # 云高(米)
76 1
77 9
78 9
79-84 003700 # 能见距离(米)
85 1 # 质量代码
86 9
87 9
88-92 -0327 # 空气温度(摄氏度*10)
93 1
94-98 -0363 # 露点温度(摄氏度*10)
99 1 # 质量代码
100-104 10264 # 大气压力
105 1 # 质量代码
某地区2002年的气象数据:https://download.csdn.net/download/henry_lin_wind/11064461
本地模式的安装和部署可以参考我的博客 :https://blog.csdn.net/Henry_Lin_Wind/article/details/88812421
1、创建Map/Reduce Project
新建input文件夹,用来存放气象数据。新建三个类,如下图:
1、MaxTemperature
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTemperature extends Configured implements Tool {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err
.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
//Configuration conf = new Configuration();
//conf.set("mapred.jar", "MaxTemperature.jar");
//Job job = Job.getInstance(conf);
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
long currentTimeMillis = System.currentTimeMillis();
System.out.println("开始调用");
int result = job.waitForCompletion(true) ? 0 : 1;
System.out.println("调用耗时 "+(System.currentTimeMillis()-currentTimeMillis)+" 毫秒");
System.exit(result);
}
@Override
public int run(String[] arg0) throws Exception {
// TODO Auto-generated method stub
return 0;
}
}
2、MaxTemperatureMapper
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String data = line.substring(15, 21);
int airTemperature;
if (line.charAt(87) == '+') { // parseInt doesn't like leading plus
// signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(data), new IntWritable(airTemperature));
}
}
}
3、MaxTemperatureReducer
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperatureReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
2、运行测试
Run - Run Configurations
运行结果: