1、Eclipse简历Maven工程,由Maven管理jar,打开pom.xml——>添加
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId> <span style="white-space:pre"> </span><!--maven一个插件,具体查看另一博客-->
<version>2.4.1</version>
<configuration>
<archive>
<manifest>
<mainClass>hadooptest.TestStart</mainClass> <!--执行主函数-->
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef> <span style="white-space:pre"> </span><!--打包时把依赖jar包打入包中-->
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId><span style="white-space:pre"> </span><!--maven管理依赖-->
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
2、新建mapper处理,代码如下
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class TestMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private final Text word = new Text();
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
3、新建Reduce处理,代码如下:
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class TestReduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
int sum = 0;
while (value.hasNext()) {
sum += value.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
4、新建main方法,代码如下
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class TestReduce extends MapReduceBase implements
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
int sum = 0;
while (value.hasNext()) {
sum += value.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
5、所有ok后,在当前工程下,执行命令mvn assembly:assembly。在target下面找到jar包
6、拷贝生成的jar到,运行的hadoop服务器下。启动hadoop
执行java -jar XXX.jar 本地输入文件 本地输出文件