文件:
a.txt
welcome tom
welcome tomm
welcome kitty
welcome james
welcome tom
b.txt
welcome james
welcome tom
welcome kitty
c.txt
welcome tom
welcome tomm
welcome james
static {
System.setProperty("hadoop.home.dir","E:/x3/hadoop-2.9.2");
}
//map
public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
//获取文件名
String name = ((FileSplit) context.getInputSplit()).getPath().getName();
for (String str : split){
context.write(new Text(str),new Text(name+"->1"));
}
}
}
//reduce
public static class MyReduce extends Reducer<Text,Text,Text,Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//<welcome,"a->1,b->1">
HashMap<String, Integer> map = new HashMap<>();
for(Text value:values){
String[] split = value.toString().split("->");
//判断集合中是否有值
if(map.get(split[0])==null){
map.put(split[0],Integer.parseInt(split[1]));
}else{
map.put(split[0],map.get(split[0])+1);
}
}
//<welcome,[a->2,b->1]>
StringBuffer stringBuffer = new StringBuffer();
//遍历map集合
for(Map.Entry<String,Integer> mm : map.entrySet()){
stringBuffer.append(mm.getKey()).append("->").append(mm.getValue());
stringBuffer.append(",");
}
String substring = stringBuffer.substring(0, stringBuffer.length() - 1);
context.write(key,new Text(substring));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "daopai");
FileInputFormat.addInputPaths(job,args[0]);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job,new Path(args[1]));
boolean result = job.waitForCompletion(true);
System.out.println(result);
}
最终结果如下: