Hadoop小兵笔记【三】利用Eclipse将wordcount打包成可以运行在hadoop上的jar包

hadoop版本为hadoop1.2.1

eclipse版本为eclipse-standard-kepler-SR2-win32-x86_64

WordCount.java为hadoop-1.2.1\src\examples\org\apache\hadoop\examples\WordCount.java

复制代码
 1 /**
 2  *  Licensed under the Apache License, Version 2.0 (the "License");
 3  *  you may not use this file except in compliance with the License.
 4  *  You may obtain a copy of the License at
 5  *
 6  *      http://www.apache.org/licenses/LICENSE-2.0
 7  *
 8  *  Unless required by applicable law or agreed to in writing, software
 9  *  distributed under the License is distributed on an "AS IS" BASIS,
10  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  *  See the License for the specific language governing permissions and
12  *  limitations under the License.
13  */
14 
15 
16 package org.apache.hadoop.examples;
17 
18 import java.io.IOException;
19 import java.util.StringTokenizer;
20 
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.fs.Path;
23 import org.apache.hadoop.io.IntWritable;
24 import org.apache.hadoop.io.Text;
25 import org.apache.hadoop.mapreduce.Job;
26 import org.apache.hadoop.mapreduce.Mapper;
27 import org.apache.hadoop.mapreduce.Reducer;
28 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
29 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
30 import org.apache.hadoop.util.GenericOptionsParser;
31 
32 public class WordCount {
33 
34   public static class TokenizerMapper 
35        extends Mapper<Object, Text, Text, IntWritable>{
36     
37     private final static IntWritable one = new IntWritable(1);
38     private Text word = new Text();
39       
40     public void map(Object key, Text value, Context context
41                     ) throws IOException, InterruptedException {
42       StringTokenizer itr = new StringTokenizer(value.toString());
43       while (itr.hasMoreTokens()) {
44         word.set(itr.nextToken());
45         context.write(word, one);
46       }
47     }
48   }
49   
50   public static class IntSumReducer 
51        extends Reducer<Text,IntWritable,Text,IntWritable> {
52     private IntWritable result = new IntWritable();
53 
54     public void reduce(Text key, Iterable<IntWritable> values, 
55                        Context context
56                        ) throws IOException, InterruptedException {
57       int sum = 0;
58       for (IntWritable val : values) {
59         sum += val.get();
60       }
61       result.set(sum);
62       context.write(key, result);
63     }
64   }
65 
66   public static void main(String[] args) throws Exception {
67     Configuration conf = new Configuration();
68     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
69     if (otherArgs.length != 2) {
70       System.err.println("Usage: wordcount <in> <out>");
71       System.exit(2);
72     }
73     Job job = new Job(conf, "word count");
74     job.setJarByClass(WordCount.class);
75     job.setMapperClass(TokenizerMapper.class);
76     job.setCombinerClass(IntSumReducer.class);
77     job.setReducerClass(IntSumReducer.class);
78     job.setOutputKeyClass(Text.class);
79     job.setOutputValueClass(IntWritable.class);
80     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
81     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
82     System.exit(job.waitForCompletion(true) ? 0 : 1);
83   }
84 }
复制代码

在eclipse中新建java project,project名为WordCount

在project中新建class,类名为WordCount

再将上述代码覆盖eclipse中的WordCount.java

并将页首的package改了wordcount,改后的源码如下

复制代码
 1 package wordcount;
 2 
 3 import java.io.IOException;
 4 import java.util.StringTokenizer;
 5 
 6 import org.apache.hadoop.conf.Configuration;
 7 import org.apache.hadoop.fs.Path;
 8 import org.apache.hadoop.io.IntWritable;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16 
17 public class WordCount {
18 
19   public static class TokenizerMapper 
20        extends Mapper<Object, Text, Text, IntWritable>{
21     
22     private final static IntWritable one = new IntWritable(1);
23     private Text word = new Text();
24       
25     public void map(Object key, Text value, Context context
26                     ) throws IOException, InterruptedException {
27       StringTokenizer itr = new StringTokenizer(value.toString());
28       while (itr.hasMoreTokens()) {
29         word.set(itr.nextToken());
30         context.write(word, one);
31       }
32     }
33   }
34   
35   public static class IntSumReducer 
36        extends Reducer<Text,IntWritable,Text,IntWritable> {
37     private IntWritable result = new IntWritable();
38 
39     public void reduce(Text key, Iterable<IntWritable> values, 
40                        Context context
41                        ) throws IOException, InterruptedException {
42       int sum = 0;
43       for (IntWritable val : values) {
44         sum += val.get();
45       }
46       result.set(sum);
47       context.write(key, result);
48     }
49   }
50 
51   public static void main(String[] args) throws Exception {
52     Configuration conf = new Configuration();
53     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
54     if (otherArgs.length != 2) {
55       System.err.println("Usage: wordcount <in> <out>");
56       System.exit(2);
57     }
58     Job job = new Job(conf, "word count");
59     job.setJarByClass(WordCount.class);
60     job.setMapperClass(TokenizerMapper.class);
61     job.setCombinerClass(IntSumReducer.class);
62     job.setReducerClass(IntSumReducer.class);
63     job.setOutputKeyClass(Text.class);
64     job.setOutputValueClass(IntWritable.class);
65     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
66     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
67     System.exit(job.waitForCompletion(true) ? 0 : 1);
68   
69   }
70 }
复制代码

 

复制代码
 1 import org.apache.hadoop.conf.Configuration;
 2 import org.apache.hadoop.fs.Path;
 3 import org.apache.hadoop.io.IntWritable;
 4 import org.apache.hadoop.io.Text;
 5 import org.apache.hadoop.mapreduce.Job;
 6 import org.apache.hadoop.mapreduce.Mapper;
 7 import org.apache.hadoop.mapreduce.Reducer;
 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 import org.apache.hadoop.util.GenericOptionsParser;
复制代码

可以看到源码import了好几个hadoop自定义类,非JDK环境自带的类,所以需要把这些依赖包导入eclipse中,不然编译器如何能找到这些类呢,得明确让编译器知道这些类所在位置。

这时候编译并运行一下,会发现有如下错误

复制代码
Exception in thread "main" java.lang.Error: Unresolved compilation problems: 
    The import org.apache.commons cannot be resolved
    The import org.apache.commons cannot be resolved
    The import org.codehaus cannot be resolved
    The import org.codehaus cannot be resolved
    Log cannot be resolved to a type
    LogFactory cannot be resolved
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    JsonFactory cannot be resolved to a type
    JsonFactory cannot be resolved to a type
    JsonGenerator cannot be resolved to a type

    at org.apache.hadoop.conf.Configuration.<init>(Configuration.java:60)
    at wordcount.WordCount.main(WordCount.java:52)
复制代码

原因是缺少依赖的jar库文件,再把缺少的jar库文件添加入库即可。

使用Add External JARs添加hadoop1.2.1\lib目录下所有jar文件。

再一次编译并运行,成功

 最后打包成为jar文件

file->export

其中,WordCount.jar不是强求与类名相同,可以改为其他,譬如CountWord.jar,关系不大,然后点击Finish即可。

之后就可以在hadoop上运行了。运行WordCount详解可以参考Hadoop集群(第6期)_WordCount运行详解

1 hadoop jar WordCount.jar WordCount input output

注意上述代码中是没有

1 package org.apache.hadoop.examples;

倘若使用了package,那么jar文件中就有层次的,不再如hadoop jar WordCount.jar WordCount input output就可以运行了,需要详细指出WordCount(这个是主类的类名),运行命令改为

hadoop jar WordCount.jar org.apache.hadoop.examples.WordCount input output

关于这里打包的内容,在[hadoop]命令行编译并运行hadoop例子WordCount有讲述



转载自:林羽飞扬

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值