package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;;
//Mapper类是一个泛型类参数分别指(输入键【长整数偏移量】,输入值【一行文本】,输出键,输出值) 是一套可优化网络序列化传输的基本类型,这类类型都在org.apache.hadoop.io包中
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING=9999;
public void map(LongWritable key, Text value, Context context )
throws IOException, InterruptedException {
//将输入的Text值转换为java的String类型
String line=value.toString();
//用substring()方法提取我们感兴趣的列
String year=line.substring(15, 19);
int airTemperature;
if(line.charAt(87)=='+'){
airTemperature=Integer.parseInt(line.substring(88, 92));
}else{
airTemperature=Integer.parseInt(line.substring(87, 92));
}
String quality=line.substring(92, 93);
if(airTemperature!=MISSING&&quality.matches("[01459]")){
//输出写入内容
context.write(new Text(year), new IntWritable(airTemperature));
}
}
public class MaxTemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING=9999;
public void map(LongWritable key, Text value, Context context )
throws IOException, InterruptedException {
//将输入的Text值转换为java的String类型
String line=value.toString();
//用substring()方法提取我们感兴趣的列
String year=line.substring(15, 19);
int airTemperature;
if(line.charAt(87)=='+'){
airTemperature=Integer.parseInt(line.substring(88, 92));
}else{
airTemperature=Integer.parseInt(line.substring(87, 92));
}
String quality=line.substring(92, 93);
if(airTemperature!=MISSING&&quality.matches("[01459]")){
//输出写入内容
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//同样reduce团也有4个参数指输入和输出类型,reduce的输入类型必须是map的输出类型
public class MaxTemperatureReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
//Iterable<> 相当于List
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int maxValue=Integer.MIN_VALUE;
for(IntWritable value:values){
maxValue=Math.max(maxValue, value.get());
}
//输出类型必须是hadoop自带的类型
context.write(key, new IntWritable(maxValue));
}
}
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//同样reduce团也有4个参数指输入和输出类型,reduce的输入类型必须是map的输出类型
public class MaxTemperatureReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
//Iterable<> 相当于List
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int maxValue=Integer.MIN_VALUE;
for(IntWritable value:values){
maxValue=Math.max(maxValue, value.get());
}
//输出类型必须是hadoop自带的类型
context.write(key, new IntWritable(maxValue));
}
}
package lxkj.com.hadoop_02;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
{
if(args.length!=2){
System.out.println("Usage:MaxTempleature <input path> <ouput path>" );
System.exit(-1);
}
//job对象指定作业额规范,可以控制整个作业的运行
Job job=new Job();
//在hadoop集群上作业时,要把代码打成一个Jar、文件,不必明确指定jar文件的名字,在job对象的setjarByClass()方法中传递一个类即可
job.setJarByClass(App.class);
//给作业起一个名字
job.setJobName("Max templature");
//输入数据路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//输出数据的路径,只能有一个输出路径
FileInputFormat.addInputPath(job, new Path(args[1]));
//只能mapper类型和reduce类型
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReduce.class);
//reduce函数的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
* Hello world!
*
*/
public class App
{
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
{
if(args.length!=2){
System.out.println("Usage:MaxTempleature <input path> <ouput path>" );
System.exit(-1);
}
//job对象指定作业额规范,可以控制整个作业的运行
Job job=new Job();
//在hadoop集群上作业时,要把代码打成一个Jar、文件,不必明确指定jar文件的名字,在job对象的setjarByClass()方法中传递一个类即可
job.setJarByClass(App.class);
//给作业起一个名字
job.setJobName("Max templature");
//输入数据路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//输出数据的路径,只能有一个输出路径
FileInputFormat.addInputPath(job, new Path(args[1]));
//只能mapper类型和reduce类型
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReduce.class);
//reduce函数的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}