static {
System.setProperty("hadoop.home.dir", "E:/x3/hadoop-2.9.2");
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "invert-index-2");
job.setJarByClass(InvertIndex2.class);
FileInputFormat.addInputPaths(job, args[0]);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(MyCombiner.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
System.exit(result ? 1 : 0);
}
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] arr = line.split("\t");
String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();
for (String word : arr) {
context.write(new Text(word + "_" + fileName), new Text("1"));
}
}
}
public static class MyCombiner extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Integer sum = 0;
for (Text value : values) {
sum += Integer.parseInt(value.toString());
}
String[] arr = key.toString().split("_");
context.write(new Text(arr[0]), new Text(arr[1] + "->" + sum));
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Map<String, Integer> map = new HashMap<>();
for (Text value : values) {
String[] arr = value.toString().split("->");
if (map.get(arr[0]) == null) {
map.put(arr[0], Integer.parseInt(arr[1]));
} else {
map.put(arr[0], map.get(arr[0]) + Integer.parseInt(arr[1]));
}
}
StringBuffer sb = new StringBuffer();
for (Map.Entry<String, Integer> entry : map.entrySet()) {
sb.append(entry.getKey()).append("->").append(entry.getValue());
sb.append(",");
}
context.write(key, new Text(sb.substring(0, sb.length() - 1)));
}
}