准备数据:
hello world of tom1
hello world of tom1
hello world of tom2
hello world of tom3
hello world of tom3
hello world of tom4
hello world of tom4
思路分析图:
1、Mapper1(切割单词)
2、Mapper2 (滤掉单词中of)
3、Mapper2_2(滤掉单词中带tom的)
4、Mapper3(reduce的输出是Mapper3的输入,滤掉单次数量少于1=次的)
1、Mapper1(切割单词)
package hadoop.mr.chain;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
*
*/
public class Mapper1 extends Mapper<LongWritable,Text,Text,IntWritable>{
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("map1 : " + value.toString());
String line = value.toString();
String[] arr = line.split(" ");
for(String w : arr){
context.write(new Text(w),new IntWritable(1));
}
}
}
package hadoop.mr.