public class Index {
private static class IndexMapper extends Mapper {
@Override
protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
// 可以获取文件名,根据文件名来判定传入reducer的形式
String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();
System.out.println(fileName);
String[] strs = value.toString().trim().split(" ");
for(int i=0; i
context.write(new Text(strs[i]), new Text(fileName));
}
}
}
private static class IndexReducer extends Reducer {
@Override
protected void reduce(Text value, Iterable datas, Reducer.Context context) throws IOException, InterruptedException {
String resultStr="";
for(Text data:datas){
String[] strs = data.toString().split("[.]");
String[] res = resultStr.split(",");
if(!strs[0].equals(res[res.length-1])){
resultStr+=strs[0]+",";
}
resultStr = resultStr.substring(0, resultStr.length()-1);
context.write(NullWritable.get(), new Text(value.toString()+":"+resultStr));
}
}
}
public static void main(String[] args) {
try {
Configuration cfg = HadoopCfg.getConfigration();
Job job = Job.getInstance(cfg);
job.setJobName("Index");
job.setJarByClass(Index.class);
job.setMapperClass(IndexMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(IndexReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("/input/index"));
FileOutputFormat.setOutputPath(job, new Path("/index/"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (Exception e) {
e.printStackTrace();
}
}
}