hadoop解决一个图相关的题

最新推荐文章于 2012-01-19 11:13:08 发布

雨落

最新推荐文章于 2012-01-19 11:13:08 发布

阅读量834

点赞数

分类专栏： Hadoop 文章标签： hadoop string mapreduce c iterator class

本文链接：https://blog.csdn.net/anbo724/article/details/6775973

版权

Hadoop 专栏收录该内容

54 篇文章 0 订阅

订阅专栏

本题是来自于：http://caibinbupt.iteye.com/blog/354316

题目是这样的：

有向图，输入是所有的边，如下图的输入是<A, B>，<A, C>，<A, D>，<A, E>，<B, C>，<B, F>，<E, D>和<C, G>，输出是图中所有这样三角形<x, <y,z>>，它的边是<x,y>，<x,z>和<y,z>。下面的图输出是<A, <B,C>>，<A, <E,D>>。用MapReduce做，不要用传统方法啦（比方说把边倒到数据库里，用SQL语句找）。

下面给出对应的解决方案以及源代码：

解决方案：

要点就是中间结果需要附加一些信息，保证在解的过程中，上下文的信息是充分的。利用这种方法，我们还可以用MapReduce做图上的宽度优先搜索。

代码：运行环境是eclipse+hadoop0.20.2

package an.hadoop.clique;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;



public class FindClique {
	
	public static class Mapper_Clique extends Mapper <LongWritable, Text, Text , Text>{
		private String source = new String();
		private String end = new String();
		
		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			//value 就是文件的内容
			String cont = value.toString(); //转化为字符串
			int index = cont.indexOf("\r\n");//回车换行符
			while(index != -1 ){
				String temp = cont.substring(0,index);
				int index1 = value.toString().indexOf(" ");//找到空格
				source = value.toString().substring(0, index1);
				end = value.toString().substring(index1+1, index1+2);
				cont = cont.substring(index+1);//去掉前面不用的部分
				context.write(new Text(source), new Text(end)); //map的输出
				index = cont.indexOf("\r\n");
			}
			if(cont.length() != 0){ //应该是最后面的一部分了
				int index1 = value.toString().indexOf(" ");//找到空格
				source = value.toString().substring(0, index1);
				end = value.toString().substring(index1+1, index1+2);
				context.write(new Text(source), new Text(end)); //map的输出
			}	
		}//map
	}//mapper
	
	//reduce 得到的结果应该是这样的：A B
	//输出的结果是 A <B,C,D,E> 
	public static class Reduce_Clique extends Reducer<Text, Text, Text, Text> {
		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			String result = "<";
			for (Text val : values) {
				result += val.toString(); //编程对应的string格式
				result += ",";
			}
			result = result.substring(0,result.length() -1 );
			result += ">";
			context.write(key, new Text(result));
		}//reduce
		
	}//Reduce_Clique
	
	//尝试使用两个map、reduce函数试试
	/**
	 * 第二个map的输入值是
	 * A	<B,C,D,E>
	 * B	<C,F>
	 * C	<G>
	 * E	<D>
	 * 这样的形式，现在需要让其变成
	 * <B, <A,<B,C,D,E>>> 
		<C, <A,<B,C,D,E>>> 
		<D, <A,<B,C,D,E>>> 
		<E, <A,<B,C,D,E>>> 
	 * 这样的形式，然后传递给reduce函数，reduce得到的输入值是这样的形式
	 * <C, <A,<B,C,D,E>>> 
	<C, <B,<C,F>>> 
	 * 这样就可以知道<A,C>、<B,C>和<A,B>了 就可以得到结果<A,<B,C>>
	 * */
	public static class Mapper_2 extends Mapper <LongWritable, Text, Text , Text>{

		
		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			//value 就是文件的内容
			String cont = value.toString(); //转化为字符串
			int index = cont.indexOf("\r\n");//回车换行符
			while(index != -1 ){
				String temp = cont.substring(0,index); //取一行   A	<B,C,D,E>
				int index1 = temp.indexOf("	");
				String sour = temp.substring(0,index1);
				String val = temp.substring(index1+2);//直接去掉< 变成了B,C,D,E>这样的形式
				int index2 = val.indexOf(","); //在后面的B,C,D,E>，提取出B等数据
				while(index2 != -1){
					//String obj = "<";
					String obj = val.substring(0,index2);//获取一个B
					val = val.substring(index2+1);//去掉一个,
					//形成一个 B		A	<B,C,D,E> 形式的输出
					
					context.write(new Text(obj), new Text(temp)); //输出B	A	<B,C,D,E>， key 为B value为A	<B,C,D,E>
					index2 = val.indexOf(",");
					
				}//while(index2 != -1)
				if(index2==-1 && val.length() != 0){
					String obj = val.substring(0,val.length()-1);//获取一个B
					context.write(new Text(obj), new Text(temp)); //输出B	A	<B>， key 为B value为A	<B>
				}
				
				cont = cont.substring(index +1);
				index = cont.indexOf("\r\n");//回车换行符
				
			}//while(index != -1 ){
			if(cont.length() != 0){ //应该是最后面的一部分了
				
				String temp = cont; //取一行   A	<B,C,D,E>
				int index1 = temp.indexOf("	");
				String sour = temp.substring(0,index1);
				String val = temp.substring(index1+2);//直接去掉< 变成了B,C,D,E>这样的形式
				int index2 = val.indexOf(","); //在后面的<B,C,D,E>，提取出B等数据
				while(index2 != -1){
					//String obj = "<";
					String obj = val.substring(0,index2);//获取一个B
					val = val.substring(index2+1);//去掉一个,
					index2 = val.indexOf(","); //在后面的<B,C,D,E>，提取出B等数据
					//形成一个 B		A	<B,C,D,E> 形式的输出
					
					context.write(new Text(obj), new Text(temp)); //输出B	A	<B,C,D,E>， key 为B value为A	<B,C,D,E>
					
				}//while
				if(index2==-1 && val.length() != 0){
					String obj = val.substring(0,val.length()-1);//获取一个B
					context.write(new Text(obj), new Text(temp)); //输出B	A	<B>， key 为B value为A	<B>
				}
			}
			
			
		}//map
	}//mapper
	
	/* 这样的形式，然后传递给reduce函数，reduce得到的输入值是这样的形式
	 * C list(A	<B,C,D,E>)
		<C, <B,<C,F>>> 
	 * 这样就可以知道<A,C>、<B,C>和<A,B>了 就可以得到结果<A,<B,C>>
	 * */
	public static class Reduce_2 extends Reducer<Text, Text, Text, Text> {
		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			Map map = new HashMap<String , Set<String>>();
			
			String tempkey = key.toString(); //C
			
			for(Text val : values){
				Set set = new HashSet<String>();
				String cont = val.toString();//A	<B,C,D,E>
				int index = cont.indexOf("	");
				String sour = cont.substring(0,index);//A
				cont = cont.substring(index+2);//B,C,D,E>
				index = cont.indexOf(",");
				//下面将得到A list B,C,D,E的map结构
				while(index != -1){
					String end = cont.substring(0,index);
					if(!end.equals(tempkey)){//去掉c
						//map.put(sour, end); //<A,B>形式
						set.add(end);
					}
					
					cont =  cont.substring(index+1);
					index = cont.indexOf(",");
				}
				if(index == -1 && cont.length() != 0){
					String end = cont.substring(0,cont.length()-1);//E>
					set.add(end); //先添加到set中去
					//map.put(sour, end);
				} //if
				map.put(sour, set);
				//set.clear();//清空为下一次做准备
			}//for
			//上面得到了所有的map<sour,list(obj)>这样的结构
			//下面是遍历，然后求交叉的部分
			Iterator it1 = map.entrySet().iterator();
			while(it1.hasNext()){
				Map.Entry<String, Set<String>> entry1 = (Map.Entry<String, Set<String>>) it1.next();
				String key1 = entry1.getKey();
				Set<String > value1 = entry1.getValue();
				Iterator st = value1.iterator();//获得set的迭代器
				while(st.hasNext()){
					String val1 = st.next().toString();//map对应的set
					
					Iterator it2 = map.entrySet().iterator();
					while(it2.hasNext()){
						if(!it1.equals(it2)){
							Map.Entry<String, Set<String>> entry2 = (Map.Entry<String, Set<String>>) it2.next();
							String key2 = entry2.getKey();
							if(val1.equals(key2)){//找到相等的了
								//形成<A,<B,C>>的形式
								String result = "<";
								result += key1;
								result += ",<";
								result += key2;
								result += ",";
								result += tempkey;
								result += ">>";
								context.write(new Text("key"), new Text(result));
							}
						}
					}//内循环it1
				}//st
			}//while it
		}//reduce
	}//Reduce_2 
	
	//main函数，主要作用是配置系统
	public static void main(String[] args) throws Exception {
		
		Path tmp = new Path("tmp"); //设置中间文件临时存储目录	
		
		Configuration conf = new Configuration();
	    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	    if (otherArgs.length != 2) {
	      System.err.println("Usage: FindClique <in> <out>");
	      System.exit(2);
	    }
	    
	    Job job = new Job(conf, "FindClique");
	    //设置对应的主类，map、reduce和combine类
	    job.setJarByClass(FindClique.class);
	    job.setMapperClass(Mapper_Clique.class);
	    job.setReducerClass(Reduce_Clique.class);
	    //mapreduce的输出数据格式
	    job.setMapOutputKeyClass(Text.class);
	    job.setMapOutputValueClass(Text.class);
	    job.setOutputKeyClass(Text.class);
	    job.setOutputValueClass(Text.class);
	    
	    //job.setPartitionerClass(MyPartitoner.class); //使用自定义MyPartitoner
	    //输入输出路径设置
	    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	    FileOutputFormat.setOutputPath(job, tmp);
	    job.waitForCompletion(true);//等待第一个job完成，然后再进行第二个  
	    
	    Configuration conf2 = new Configuration();
	    Job job2 = new Job(conf2, "FindClique2");
	  //设置对应的主类，map、reduce和combine类
	    job2.setJarByClass(FindClique.class);
	    job2.setMapperClass(Mapper_2.class);
	    job2.setReducerClass(Reduce_2.class);
	    //mapreduce的输出数据格式
	    job2.setMapOutputKeyClass(Text.class);
	    job2.setMapOutputValueClass(Text.class);
	    job2.setOutputKeyClass(Text.class);
	    job2.setOutputValueClass(Text.class);
	    //设置输入输出路径
	    FileInputFormat.setInputPaths(job2, tmp);
	    FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));
	    
	    
	    
	    System.exit(job2.waitForCompletion(true) ? 0 : 1);
		
	}
}