package cn.tedu.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WCDriver {
public static void main(String[] args) throws Exception {
//1.声明一个作业
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//2.声明作业的入口
job.setJarByClass(WCDriver.class);
//3.声明 Mapper
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//4.声明Reducer
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//5.声明输入位置
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.157.136:9000/wc/words.txt"));
//6.声明输入出位置
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.157.136:9000/wcresult"));
//7.启动作业
job.waitForCompletion(true);
}
}
package cn.tedu.wordcount;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
@Override
protected void reduce(Text key, Iterable values,
Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
//获取单词
String word = key.toString();
//遍历values,累计数量
long count = 0;
Iterator it = values.iterator();
while(it.hasNext()){
count += it.next().get();
}
context.write(new Text(word), new LongWritable(count));
}
}
package cn.tedu.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
public void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
//获取行数据
String line = value.toString();
//切分为单词
String[] arr = line.split(" ");
//输出单词和数量
for (String word : arr) {
context.write(new Text(word), new LongWritable(1));
}
}
}