1 任务
分析历年最高气温
数据格式:
0043011990999991950051512004+68750+023550FM12+038299999V0203201N00671220001CN9999999N9+00221+99999999999
2 编写Mapper类
package weather;
import GroupingComparatorOrder.GroupingComparatorPartitioner;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class weatherMap extends Mapper<LongWritable, Text, Text, IntWritable>{
Text k = new Text();
IntWritable v = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//将序列化的数据转换
String lines = value.toString();
//获取数据中的年份和温度和气温前面符号位
String year = lines.substring(15,19);
int temperature = Integer.parseInt(lines.substring(88,92));
String symbol = lines.substring(87,88);
//判断天气前面的符号位
if(symbol.equals("+")){
temperature = temperature;
}else {
temperature = temperature-2*temperature;
}
k.set(year);
v.set(temperature);
//输出数据
context.write(k,v);
}
}
3 编写Reducer类
package weather;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class weatherReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//定义一个临时变量存储最大天气数据
int max = 0;
for (IntWritable value:values) {
int temperature = value.get();
if (temperature>max){
max = temperature;
}
}
context.write(key,new IntWritable(max));
}
}
4 编写Job(主函数)
package weather;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.net.URI;
import java.net.URISyntaxException;
public class weatherJob {
public static void main(String[] args) throws Exception {
//设置系统环境变量 设置Hadoop用户为root
System.setProperty("HADOOP_USER_NAME","root");
//构造配置对象
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","yarn");
conf.set("yarn.resourcemanager.hostname","master");
conf.set("fs.defaultFS","hdfs://master:9000");
//添加跨平台参数
conf.set("mapreduce.app-submission.cross-platform","true");
//获取操作hdfs客户端
FileSystem fs = FileSystem.get(new URI("hdfs://master:9000"),conf,"root");
Path output = new Path("/weather/output");
//如果输出目录存在,删除输出目录
if(fs.exists(output)){
fs.delete(output,true);
}
//创建job对象
Job job = Job.getInstance(conf,"weather");
job.setJar("E:\\Hadoop\\HDFSDemo\\target\\HDFSDemo-1.0-SNAPSHOT.jar");
job.setMapperClass(weatherMap.class);
job.setReducerClass(weatherReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//指定输入输出目录
FileInputFormat.setInputPaths(job,new Path("/weather/input"));
FileOutputFormat.setOutputPath(job,output);
//提交任务
boolean b = job.waitForCompletion(true);
if(b){
System.out.println("分析任务完成!");
}else {
System.out.println("分析任务失败!");
}
}
}
结果:
1949 22
1950 111