Giraph调试奇怪问题记录

最新推荐文章于 2018-10-03 17:15:45 发布

飞火流云

最新推荐文章于 2018-10-03 17:15:45 发布

阅读量314

点赞数

本文链接：https://blog.csdn.net/cloudeagle_bupt/article/details/76945963

版权

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.giraph.examples;

import org.apache.giraph.graph.BasicComputation;
import org.apache.giraph.graph.DefaultVertex;
import org.apache.giraph.graph.GraphTaskManager;
import org.apache.giraph.edge.ArrayListEdges;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.EdgeFactory;
import org.apache.giraph.edge.EdgeNoValue;
import org.apache.giraph.graph.Vertex;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;
import java.util.ArrayList;

/**
 * Implementation of the HCC algorithm that identifies connected components and
 * assigns each vertex its "component identifier" (the smallest vertex id
 * in the component)
 *
 * The idea behind the algorithm is very simple: propagate the smallest
 * vertex id along the edges to all vertices of a connected component. The
 * number of supersteps necessary is equal to the length of the maximum
 * diameter of all components + 1
 *
 * The original Hadoop-based variant of this algorithm was proposed by Kang,
 * Charalampos, Tsourakakis and Faloutsos in
 * "PEGASUS: Mining Peta-Scale Graphs", 2010
 *
 * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf
 */
@Algorithm(
    name = "Connected components",
    description = "Finds connected components of the graph"
)
public class RecodeComputation extends
    BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
  /**
   * 1.0号超步写入zk各自分区的顶点数量。
   * 2.1号超步所有分区根据zk各自更新ID, 并发送更新后的顶点消息.
   * 3.更新接收的顶点ID。
   */
  @Override
  public void compute(
      Vertex<IntWritable, IntWritable, NullWritable> vertex,
      Iterable<IntWritable> messages) throws IOException {
    if (getSuperstep() == 0) {  
      GraphTaskManager.LOG.info("Vertex " + vertex.toString()) ;
    	sendMessageToAllEdges(vertex, vertex.getId()) ; //向目标顶点发送源顶点ID
      return;
    }
    
    if (getSuperstep() == 1) {   //1.每个目标顶点将收到的消息保存，并汇总为入度顶点集合,清空出度边集合,并将出度边集合复用。
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
    	edges.clear() ;
    	
	   for (IntWritable srcId : messages) {
		   GraphTaskManager.LOG.info("Received Messags : " + srcId.toString() ) ;
		   Integer src = srcId.get() ;
		   ((DefaultVertex)vertex).inEdges.add(new IntWritable(src)) ;
	   }
	   
	   StringBuffer sb = new StringBuffer() ;
	   ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
	  for(IntWritable inEdge : inEdges) {
		  GraphTaskManager.LOG.info("inEdge " + inEdge.toString() ) ;
		  sb.append(inEdge.toString()) ;		  
	  }
	   
	   GraphTaskManager.LOG.info("New Edges " + vertex.toString() + sb.toString() + " at step " + getSuperstep()) ;
      return;
    }
    
    if(getSuperstep() == 2) {   //2.每个分区根据顶点数量确定各自分区顶点的起始编码，并按照递增的顺序对本分区内部所有顶点ID进行更新，各顶点更新自己的ID后，将更新后的新ID作为消息。按照超步1中获取的入度顶点集合进行发送。 
    	ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
    	for (IntWritable srcId : inEdges) {
	           GraphTaskManager.LOG.info("Send New id : " + vertex.toString()  + " to " + srcId + " at step " + getSuperstep()) ;
	           sendMessage(srcId, vertex.getId()) ; //向原来的入度边发送自己的新ID
    	}
    }
    
    if(getSuperstep() == 3) {   //3.每个顶点收到的消息作为新的出度边。 
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
 	   edges.clear(); //当前edges实为入度边集合
    	for (IntWritable srcId : messages) {
    		 Integer src = srcId.get() ;
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(new IntWritable(src))) ;
 	   }
 	   GraphTaskManager.LOG.info("new Edges " + vertex.toString() +  edges.printEdges() + " at step " + getSuperstep()) ;
 	   vertex.voteToHalt() ;
    }
  }
}

自定义Giraph1.1.0的重编码计算类，其中超步3中如果这样写：

for (IntWritable srcId : messages) {
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(srcId)) ;

在以上写法中，会造成迭代中值重复，原因还没想明白。

修改后的重编码类：

public class RecodeComputation extends
    BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
  /**
   * 1.0号超步写入zk各自分区的顶点数量。
   * 2.1号超步所有分区根据zk各自更新ID, 并发送更新后的顶点消息.
   * 3.更新接收的顶点ID。
   */
  @Override
  public void compute(
      Vertex<IntWritable, IntWritable, NullWritable> vertex,
      Iterable<IntWritable> messages) throws IOException {
    if (getSuperstep() == 0) {  
    	sendMessageToAllEdges(vertex, vertex.getId()) ; //向目标顶点发送源顶点ID
      return;
    }
    
    if (getSuperstep() == 1) {   //1.每个目标顶点将收到的消息保存，并汇总为入度顶点集合,清空出度边集合,并将出度边集合复用。
	   for (IntWritable srcId : messages) {
		   Integer src = srcId.get() ;
		   ((DefaultVertex)vertex).inEdges.add(new IntWritable(src)) ;
	   }
      return;
    }
    
    if(getSuperstep() == 2) {   //2.每个分区根据顶点数量确定各自分区顶点的起始编码，并按照递增的顺序对本分区内部所有顶点ID进行更新，各顶点更新自己的ID后，将更新后的新ID作为消息。按照超步1中获取的入度顶点集合进行发送。 
    	ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
    	for (IntWritable srcId : inEdges) {
	           sendMessage(srcId, vertex.getId()) ; //向原来的入度边发送自己的新ID
    	}
    }
    
    if(getSuperstep() == 3) {   //3.每个顶点收到的消息作为新的出度边。 
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
 	   edges.clear(); //当前edges实为入度边集合
    	for (IntWritable srcId : messages) {
    		 Integer src = srcId.get() ;
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(new IntWritable(src))) ;
 	   }
 	   vertex.voteToHalt() ;
    }
  }
}

飞火流云

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Giraph调试奇怪问题记录

/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding c
复制链接

扫一扫