本题是来自于:http://caibinbupt.iteye.com/blog/354316
题目是这样的:
有向图,输入是所有的边,如下图的输入是<A, B>,<A, C>,<A, D>,<A, E>,<B, C>,<B, F>,<E, D>和<C, G>,输出是图中所有这样三角形<x, <y,z>>,它的边是<x,y>,<x,z>和<y,z>。下面的图输出是<A, <B,C>>,<A, <E,D>>。用MapReduce做,不要用传统方法啦(比方说把边倒到数据库里,用SQL语句找)。
下面给出对应的解决方案以及源代码:
解决方案:
要点就是中间结果需要附加一些信息,保证在解的过程中,上下文的信息是充分的。利用这种方法,我们还可以用MapReduce做图上的宽度优先搜索。
代码:运行环境是eclipse+hadoop0.20.2
package an.hadoop.clique;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class FindClique {
public static class Mapper_Clique extends Mapper <LongWritable, Text, Text , Text>{
private String source = new String();
private String end = new String();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//value 就是文件的内容
String cont = value.toString(); //转化为字符串
int index = cont.indexOf("\r\n");//回车换行符
while(index != -1 ){
String temp = cont.substring(0,index);
int index1 = value.toString().indexOf(" ");//找到空格
source = value.toString().substring(0, index1);
end = value.toString().substring(index1+1, index1+2);
cont = cont.substring(index+1);//去掉前面不用的部分
context.write(new Text(source), new Text(end)); //map的输出
index = cont.indexOf("\r\n");
}
if(cont.length() != 0){ //应该是最后面的一部分了
int index1 = value.toString().indexOf(" ");//找到空格
source = value.toString().substring(0, index1);
end = value.toString().substring(index1+1, index1+2);
context.write(new Text(source), new Text(end)); //map的输出
}
}//map
}//mapper
//reduce 得到的结果应该是这样的:A B
//输出的结果是 A <B,C,D,E>
public static class Reduce_Clique extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String result = "<";
for (Text val : values) {
result += val.toString(); //编程对应的string格式
result += ",";
}
result = result.substring(0,result.length() -1 );
result += ">";
context.write(key, new Text(result));
}//reduce
}//Reduce_Clique
//尝试使用两个map、reduce函数试试
/**
* 第二个map的输入值是
* A <B,C,D,E>
* B <C,F>
* C <G>
* E <D>
* 这样的形式,现在需要让其变成
* <B, <A,<B,C,D,E>>>
<C, <A,<B,C,D,E>>>
<D, <A,<B,C,D,E>>>
<E, <A,<B,C,D,E>>>
* 这样的形式,然后传递给reduce函数,reduce得到的输入值是这样的形式
* <C, <A,<B,C,D,E>>>
<C, <B,<C,F>>>
* 这样就可以知道<A,C>、<B,C>和<A,B>了 就可以得到结果<A,<B,C>>
* */
public static class Mapper_2 extends Mapper <LongWritable, Text, Text , Text>{
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//value 就是文件的内容
String cont = value.toString(); //转化为字符串
int index = cont.indexOf("\r\n");//回车换行符
while(index != -1 ){
String temp = cont.substring(0,index); //取一行 A <B,C,D,E>
int index1 = temp.indexOf(" ");
String sour = temp.substring(0,index1);
String val = temp.substring(index1+2);//直接去掉< 变成了B,C,D,E>这样的形式
int index2 = val.indexOf(","); //在后面的B,C,D,E>,提取出B等数据
while(index2 != -1){
//String obj = "<";
String obj = val.substring(0,index2);//获取一个B
val = val.substring(index2+1);//去掉一个,
//形成一个 B A <B,C,D,E> 形式的输出
context.write(new Text(obj), new Text(temp)); //输出B A <B,C,D,E>, key 为B value为A <B,C,D,E>
index2 = val.indexOf(",");
}//while(index2 != -1)
if(index2==-1 && val.length() != 0){
String obj = val.substring(0,val.length()-1);//获取一个B
context.write(new Text(obj), new Text(temp)); //输出B A <B>, key 为B value为A <B>
}
cont = cont.substring(index +1);
index = cont.indexOf("\r\n");//回车换行符
}//while(index != -1 ){
if(cont.length() != 0){ //应该是最后面的一部分了
String temp = cont; //取一行 A <B,C,D,E>
int index1 = temp.indexOf(" ");
String sour = temp.substring(0,index1);
String val = temp.substring(index1+2);//直接去掉< 变成了B,C,D,E>这样的形式
int index2 = val.indexOf(","); //在后面的<B,C,D,E>,提取出B等数据
while(index2 != -1){
//String obj = "<";
String obj = val.substring(0,index2);//获取一个B
val = val.substring(index2+1);//去掉一个,
index2 = val.indexOf(","); //在后面的<B,C,D,E>,提取出B等数据
//形成一个 B A <B,C,D,E> 形式的输出
context.write(new Text(obj), new Text(temp)); //输出B A <B,C,D,E>, key 为B value为A <B,C,D,E>
}//while
if(index2==-1 && val.length() != 0){
String obj = val.substring(0,val.length()-1);//获取一个B
context.write(new Text(obj), new Text(temp)); //输出B A <B>, key 为B value为A <B>
}
}
}//map
}//mapper
/* 这样的形式,然后传递给reduce函数,reduce得到的输入值是这样的形式
* C list(A <B,C,D,E>)
<C, <B,<C,F>>>
* 这样就可以知道<A,C>、<B,C>和<A,B>了 就可以得到结果<A,<B,C>>
* */
public static class Reduce_2 extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Map map = new HashMap<String , Set<String>>();
String tempkey = key.toString(); //C
for(Text val : values){
Set set = new HashSet<String>();
String cont = val.toString();//A <B,C,D,E>
int index = cont.indexOf(" ");
String sour = cont.substring(0,index);//A
cont = cont.substring(index+2);//B,C,D,E>
index = cont.indexOf(",");
//下面将得到A list B,C,D,E的map结构
while(index != -1){
String end = cont.substring(0,index);
if(!end.equals(tempkey)){//去掉c
//map.put(sour, end); //<A,B>形式
set.add(end);
}
cont = cont.substring(index+1);
index = cont.indexOf(",");
}
if(index == -1 && cont.length() != 0){
String end = cont.substring(0,cont.length()-1);//E>
set.add(end); //先添加到set中去
//map.put(sour, end);
} //if
map.put(sour, set);
//set.clear();//清空为下一次做准备
}//for
//上面得到了所有的map<sour,list(obj)>这样的结构
//下面是遍历,然后求交叉的部分
Iterator it1 = map.entrySet().iterator();
while(it1.hasNext()){
Map.Entry<String, Set<String>> entry1 = (Map.Entry<String, Set<String>>) it1.next();
String key1 = entry1.getKey();
Set<String > value1 = entry1.getValue();
Iterator st = value1.iterator();//获得set的迭代器
while(st.hasNext()){
String val1 = st.next().toString();//map对应的set
Iterator it2 = map.entrySet().iterator();
while(it2.hasNext()){
if(!it1.equals(it2)){
Map.Entry<String, Set<String>> entry2 = (Map.Entry<String, Set<String>>) it2.next();
String key2 = entry2.getKey();
if(val1.equals(key2)){//找到相等的了
//形成<A,<B,C>>的形式
String result = "<";
result += key1;
result += ",<";
result += key2;
result += ",";
result += tempkey;
result += ">>";
context.write(new Text("key"), new Text(result));
}
}
}//内循环it1
}//st
}//while it
}//reduce
}//Reduce_2
//main函数,主要作用是配置系统
public static void main(String[] args) throws Exception {
Path tmp = new Path("tmp"); //设置中间文件临时存储目录
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: FindClique <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "FindClique");
//设置对应的主类,map、reduce和combine类
job.setJarByClass(FindClique.class);
job.setMapperClass(Mapper_Clique.class);
job.setReducerClass(Reduce_Clique.class);
//mapreduce的输出数据格式
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setPartitionerClass(MyPartitoner.class); //使用自定义MyPartitoner
//输入输出路径设置
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, tmp);
job.waitForCompletion(true);//等待第一个job完成,然后再进行第二个
Configuration conf2 = new Configuration();
Job job2 = new Job(conf2, "FindClique2");
//设置对应的主类,map、reduce和combine类
job2.setJarByClass(FindClique.class);
job2.setMapperClass(Mapper_2.class);
job2.setReducerClass(Reduce_2.class);
//mapreduce的输出数据格式
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
//设置输入输出路径
FileInputFormat.setInputPaths(job2, tmp);
FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));
System.exit(job2.waitForCompletion(true) ? 0 : 1);
}
}