----------------------------说明------------------------------
此程序比HelloHadoopV1增加了:
检查输出信息文件夹内是否存在输出文件并删除
input文件夹内的文件若大于两个,则文件不会被覆盖
map与reduce拆开,便于函数再利用
---------------------------------------------------------------
HelloMapperV2.java
package HelloHadoopV2;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class HelloMapperV2 extends Mapper<LongWritable,Text,Text,Text>{
public void map(LongWritable key,Text value,Context context)
throws IOException, InterruptedException{
context.write(new Text(key.toString()), value);
}
}
HelloReducerV2.java
package HelloHadoopV2;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class HelloReducerV2 extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
String str = new String("");
Text final_key = new Text();
Text final_value = new Text();
//将key值相同的values,通过 && 符号分隔之
for(Text tmp : values){
str += tmp.toString()+ " &&";
}
final_key.set(key);
final_value.set(str);
context.write(final_key, final_value);
}
}
HelloHadoopV2.java
package HelloHadoopV2;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import CheckAndDelete.CheckAndDelete;
public class HelloHadoopV2 {
/**
* @param args
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf,"Hadoop Hello World 2");
job.setJarByClass(HelloHadoopV2.class);
//设置map and reduce以及 combiner class
job.setMapperClass(HelloMapperV2.class);
job.setCombinerClass(HelloReducerV2.class);
job.setReducerClass(HelloReducerV2.class);
//设定map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设定reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置文件输入路径
FileInputFormat.addInputPath(job,new Path("/user/hadoopinput"));
//设置文件输出路径
FileOutputFormat.setOutputPath(job,new Path("/user/hadoop/output-hh2"));
//调用CheckAndDelete函数。检查是否存在文件夹,若存在则删除
CheckAndDelete.checkAndDelete("/user/hadoop/output-hh2",conf);
boolean status = job.waitForCompletion(true);
if(status){
System.err.println("Integrate Alert Job Finished !");
}else{
System.err.println("Integrate Alert Job Failed");
System.exit(1);
}
}
}
测试方法:
1、启动hadoop
2、将该代码打包为HelloHadoop.jar,并将其拷贝到hadoop目录下
3、执行过程如 http://freewxy.iteye.com/blog/1102011
4、查看结果: