看了Hadoop权威指南第三版上面的计算最大温度的例子之后自己动手实践了一遍。下面是过程:
首先是数据,我只用了两个年份两个TXT文本:
接下来是MaxTemperatureMapper.java代码:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTemperatureMapper extends
Mapper<LongWritable,Text,Text,IntWritable>
{
private static final int MISSING = 9999;
@Override
public void map(LongWritable key,Text value,Context context)
throws IOException,InterruptedException
{
String line = value.toString();
String year = line.substring(15,19);
int airTemperature;
if(line.charAt(87) == '+')
{
airTemperature = Integer.parseInt(line.substring(88,92));
}
else
{
airTemperature = Integer.parseInt(line.substring(87,92));
}
String quality = line.substring(92,93);
if(airTemperature != MISSING && quality.matches("[01459]"))
{
context.write(new Text(year),new IntWritable(airTemperature));
}
}
}
接下来是MaxTemperatureReducer.java代码:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperatureReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
接下来是主函数MaxTemperature.java代码:
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTemperature {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err
.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
接下来就是打包需要的hadoop的jar包,此程序需要的包有:
$HADOOP_HOME/share/hadoop/common/hadoop-common-2.4.1.jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.4.1.jar
$HADOOP_HOME/share/hadoop/common/lib/commons-cli-1.2.jar
接下来是把所需的jar包加到路径:
export CLASSPATH="$HADOOP_HOME/share/hadoop/common/hadoop-common-2.6.0.jar:$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0.jar:$HADOOP_HOME/share/hadoop/common/lib/commons-cli-1.2.jar:$CLASSPATH"
接下来就是编译代码了,
javac MaxTemperature.java
之后,会生成对应的class文件。如图:
接下来,把class文件打包成jar包,把测试的年份数据文件put到hdfs中。
jar -cvf MaxTemperature.jar ./Max*.class
bin/hadoop fs -put /opt/hadoopTestSample/input/ /MaxTemInput
然后,运行程序:
bin/hadoop jar myclass/MaxTemperature.jar MaxTemperature /MaxTemInput /output/MaxTem1
查看结果:
bin/hadoop fs -cat /output/MaxTem1/*
结果如图:
好了,一个小小程序终于测试成功。
参考文章:
http://www.powerxing.com/hadoop-build-project-by-shell/