Java中使用MapReduce实现WordCount
Map端
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable>
{
Text outPutKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String[] words = value.toString().split("\\s+");
for (String word : words)
{
outPutKey.set(word);
context.write(outPutKey, new IntWritable(1));
}
}
}
Combiner端(可选)
Combiner端和Reduce端代码一样,其设定是为了减少传输到Reduce中的数据量,
减少网络带宽和Reduce的负载
Reduce端
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>
{
IntWritable outPutValue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
int sum = 0;
Iterator<IntWritable> iter = values.iterator();
while (iter.hasNext())
{
IntWritable num = iter.next();
sum += num.get();
}
outPutValue.set(sum);
context.write(key,outPutValue);
}
}
Driver端
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver
{
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException
{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);
job.setCombinerClass(WordCountCombiner.class);
job.setJobName("wordcount");
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,new Path("/test/wc.txt"));
FileSystem fs=FileSystem.get(conf);
Path path = new Path("/test/output");
if(fs.exists(path))
{
fs.delete(path,true);
}
fs.close();
FileOutputFormat.setOutputPath(job, path);
boolean result=job.waitForCompletion(true);
System.exit(result?0:1);
}
}
备注
-
文件只需随意写一个文件就行,但是需要以空格作为分割,如:
![在这里插入图片描述](https://img-blog.csdnimg.cn/2019121515431159.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQwNTc5NDY0,size_16,color_FFFFFF,t_70#pic_center)
-
若Driver端的configuration没有特别设置,则需要提前将配置文件拷贝到resources中
![在这里插入图片描述](https://img-blog.csdnimg.cn/20191215154442792.png#pic_center)