package cn.kgc.kb09.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WCMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
@Override //重写map()方法
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line=value.toString(); //value对应的是每一行文本
String[] words=line.split(" "); //根据空格进行切割
for (String word : words) { //将分割的单词遍历输出
context.write(new Text(word),new IntWritable(1));
}
}
}
package cn.kgc.kb09.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WCReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total=0;
for (IntWritable value : values) {
total+=value.get();
}
context.write(key,new IntWritable(total));
}
}
package cn.kgc.kb09.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WCDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//1.建立连接
Configuration cfg=new Configuration();
Job job=Job.getInstance(cfg,"job_wc");
job.setJarByClass(WCDriver.class);
//2.指定mapper和reducer
job.setMapperClass(WCMapper.class);
job.setReducerClass(WCReducer.class);
//指定mapper的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//指定reducer的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//指定输入输出路径
FileInputFormat.setInputPaths(job,new Path("file:///E:/test/a.txt"));
FileOutputFormat.setOutputPath(job,new Path("file:///E:/test/result"));
//3.运行
boolean result=job.waitForCompletion(true);
System.out.println(result?"成功":"失败");
System.exit(result?0:1);
}
}
a.txt 文件内容
输出结果如下图: