气温数据:
1997-07-01 10
1998-04-01 26
1997-05-01 -5
1989-06-01 25
1925-05-01 45
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MaxTemp {
//本地运行需要的方法
static {
System.setProperty("hadoop.home.dir", "D:\\soft\\hadoop\\hadoop-2.9.2");
}
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text > {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] lineArr = line.split("\\s");
//从日期中获取年
String year = lineArr[0].substring(0, 4);
context.write(new Text(year),value);
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Double maxTemp=Double.MIN_VALUE;
String day=null;
for (Text value:values
) {
String str = value.toString();
String[] split = str.split("\\s");
//拆分出气温和对应的日期
double temp = Double.parseDouble(split[1]);
if (temp > maxTemp) {
//比较获取最高气温
maxTemp=temp;
//比较获取最高气温对应的日期
day=split[0];
}
}
//输出获取到的每年每天最高的气温
context.write(key,new Text(day +" "+maxTemp));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//0.初始化一个job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxTemp");
/*
打jar包集群方式运行
job.setJarByClass(WordCount.class);
*/
//1.输入文件
FileInputFormat.addInputPath(job, new Path(args[0]));
//2.map并行计算
//如果map的输出key value 的类型个reduce key value的类型相同可以省略
job.setMapperClass(MyMapper.class);
// job.setMapOutputKeyClass(Text.class);
// job.setMapOutputValueClass(LongWritable.class);
//3.shuffle流程(内部实现)
//4.reduce计算
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//5.输出文件
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//如果输入目录存在删除
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(args[1]))){
fs.delete(new Path(args[1]),true);
}
//6.提交作业(总入口)
boolean result = job.waitForCompletion(true);
System.out.println(result ? 1 : 0);
}
}
结果:
1925 1925-05-01 45.0
1989 1989-06-01 25.0
1997 1997-07-01 10.0
1998 1998-04-01 26.0