在Hadoop 1.x 编写自己的WordCount程序中编写了自己的wordcount程序,以及上篇文章介绍了eclipse和hadoop的集成,本篇就使用集成开发环境进行上述程序的测试。
1、新建Map/Reduce project
根据上篇文章步骤进行创建,会自动导入各种jar包。
2、编写WordCount程序
为了简便,把之前编写的代码考进来,同时指定输入目录和输出目录,如下所示:
package org.dragon.hadoop.mr;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
* MapReduce案例,wordcount程序
* @author Administrator
*
*/
public class MyWordCount {
//Mapper区域
/**
* WordCount程序map类
*KEYIN 输入key类型 ---开始位置
*VALUEIN 输入value类型
*KEYOUT 输出key类型
*VALUEOUT 输出value类型
*/
static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
private Text word = new Text();
private final static IntWritable one = new IntWritable(1);
//针对每行进行map操作
protected void map(
LongWritable key,
Text value,
Context context)
throws java.io.IOException, InterruptedException {
//获取每行数据的值
String lineContent = value.toString();
//进行分割,默认的分割字符:" \t\n\r\f"
StringTokenizer stringTokenizer = new StringTokenizer(lineContent);
//遍历
while(stringTokenizer.hasMoreTokens()){
//获取每个值
String wordValue = stringTokenizer.nextToken();
//设置map输出的key值
word.set(wordValue);
//上下文删除map的key和value
context.write(word, one);
}
}
}
//Reducer 区域
/**
* WordCount程序reduce类
* map的输出类型即为reduce的输入类型
*
*/
static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
private IntWritable result = new IntWritable();
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws java.io.IOException ,InterruptedException {
//用于累加的中间变量,以便统计key出现的总次数
int sum = 0;
//遍历Iterable
for(IntWritable value :values){
//进行累加
sum += value.get();
}
//设置key对应出现的总次数
result.set(sum);
context.write(key, result);
}
}
//client 区域
public static void main(String[] args) throws Exception {
//输入参数赋值
args = new String[]{
"hdfs://hadoop-master.dragon.org:9000/opt/data/test/input/",
"hdfs://hadoop-master.dragon.org:9000/opt/data/test/output2/"
};
//获取HDFS配置信息
Configuration conf = new Configuration();
/*********************************************优化start*******************************/
//进行优化
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!=2){
System.err.print("Usage:wordcount <in> <out>");
System.exit(2);
}
/*********************************************优化end*******************************/
//创建Job,设置配置和Job名称
Job job = new Job(conf,"myjob");
//设置Job运行的类
job.setJarByClass(MyWordCount.class);
//设置Mapper和Reducer类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//设置输入文件的目录和输出文件的目录,运行的时候传入
// FileInputFormat.addInputPath(job, new Path(args[0]));
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
//优化调用
/*********************************************优化调用start*******************************/
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
/*********************************************优化调用end*******************************/
//设置输出结果key和value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//提交Job,等待运行结果,并在客户端显示信息
boolean isSuccess = job.waitForCompletion(true);
//结束程序
System.exit(isSuccess?0:1);
}
}
3、运行
邮件点击 Run as ---》Run on Hadoop,选择server,点击finish,会报如下错误:
16/03/21 20:36:23 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
16/03/21 20:36:23 ERROR security.UserGroupInformation: PriviledgedActionException as:Administrator cause:java.io.IOException: Failed to set permissions of path: \opt\data\tmp\mapred\staging\Administrator2085519810\.staging to 0700
Exception in thread "main" java.io.IOException: Failed to set permissions of path: \opt\data\tmp\mapred\staging\Administrator2085519810\.staging to 0700
at org.apache.hadoop.fs.FileUtil.checkReturnValue(FileUtil.java:691)
at org.apache.hadoop.fs.FileUtil.setPermission(FileUtil.java:664)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:514)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:349)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:193)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:126)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:942)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:936)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:936)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:550)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:580)
at org.dragon.hadoop.mr.MyWordCount.main(MyWordCount.java:123)
即出现了PriviledgedActionException异常
4、解决PriviledgedActionException异常
解决方法是覆盖 org.apache.hadoop.fs.FileUtil方法中的checkReturnValue中的内容,为了方便,不改变hadoop jar包,在工程中建立同名包org.apache.hadoop.fs.FileUtil和同名类FileUtil,把源码拷贝进来,同时注释上述方法,如下:
private static void checkReturnValue(boolean rv, File p,
FsPermission permission
) throws IOException {
/* if (!rv) {
throw new IOException("Failed to set permissions of path: " + p +
" to " +
String.format("%04o", permission.toShort()));
}*/
}
重新运行即可成功。